Show the code
library(targets)
library(tidyverse)
library(ggokabeito)
library(easystats)
library(gt)
library(ggfittext)
library(scales)
library(visdat)
library(collapse)
library(ggpubr)
library(knitr)Dieser Arbeitsbericht schildert das technische Vorgehen im Rahmen der Analyse der Matomo-Daten des BMBF-Projekt “HaNS”.
Die Matomo-Klickdaten aller Semester der Projektlaufzeit wurden für diese Analyse verarbeitet. Mit Hilfe einer R-Pipeline wurden eine Reihe von Forschungsfragen analysiert.
Der komplette Code ist online dokumentiert unter https://github.com/sebastiansauer/hans. Aus Datenschutzgründen sind online keine Daten eingestellt.
Die zentrale Analyse-Pipeline-Datei ist https://github.com/sebastiansauer/hans/blob/main/_targets.R.
library(targets)
library(tidyverse)
library(ggokabeito)
library(easystats)
library(gt)
library(ggfittext)
library(scales)
library(visdat)
library(collapse)
library(ggpubr)
library(knitr)theme_set(theme_minimal())options(lubridate.week.start = 1) # Monday as first day
#options(collapse_mask = "all") # use collapse for all dplyr operations
options(chromote.headless = "new") # Chrome headleass needed for gtsaveImmer knitr::kable für das Zeigen von Data frames verwenden:
Die Analyse wird im Rahmen einer Targets-Pipeline beschrieben und ist offen auf Github einsehbar.
Aufgrund des “rechts flatternden” Datenformat (d.h. unterschiedliche Zeilenlängen) wurden die Daten in ein Langformat überführt, zwecks besserer/einfacherer Analyse.
Dazu wurden (neben den ID-Variablen, v.a. idvisit) die actionDetails_-Variablen verwendet. Der Code des Pivotierens in das Langformat ist in der Funktion longify-data.R einsehbar.
Die Daten im Langformat wurden dann noch etwas aufbereitet mt der Funktion slimify-data.R.
tar_load(data_slim_filtered_head)
data_slim_filtered_head |>
head(30) |>
kable()| nr | type | value | idvisit |
|---|---|---|---|
| 0 | subtitle | https://hans.th-nuernberg.de/login | 1 |
| 0 | subtitle | https://hans.th-nuernberg.de/login?evalId=none&user=undefined&role=undefined | 1 |
| 0 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 0 | subtitle | https://hans.th-nuernberg.de/login?evalId=none&role=undefined | 1 |
| 0 | timestamp | 2023-03-23 14:20:12 | 1 |
| 0 | timestamp | 2024-06-06 11:19:16 | 1 |
| 0 | timestamp | 2022-12-05 15:33:45 | 1 |
| 0 | timestamp | 2023-10-04 16:19:46 | 1 |
| 1 | eventcategory | click_videocard | 1 |
| 1 | eventcategory | login | 1 |
| 1 | eventaction | Einführung in HAnS | 1 |
| 1 | eventaction | success | 1 |
| 1 | timestamp | 2023-03-23 14:20:26 | 1 |
| 1 | timestamp | 2024-06-06 11:21:07 | 1 |
| 1 | timestamp | 2022-12-05 15:33:49 | 1 |
| 1 | timestamp | 2023-10-04 16:19:54 | 1 |
| 1 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 1 | subtitle | https://hans.th-nuernberg.de/login?evalId=none&user=undefined&role=undefined | 1 |
| 1 | subtitle | Category: ““click_videocard’, Action:”“Einführung in HAnS”” | 1 |
| 1 | subtitle | Category: ““login’, Action:”“success”” | 1 |
| 2 | subtitle | Category: ““click_toggle’, Action:”“Evaluation”” | 1 |
| 2 | subtitle | https://hans.th-nuernberg.de/video-player?uuid=b57c1dfe-a667-48a6-b43d-dac7e600ae8c | 1 |
| 2 | subtitle | https://hans.th-nuernberg.de/?evalId=none&role=developer | 1 |
| 2 | timestamp | 2023-03-23 14:20:28 | 1 |
| 2 | timestamp | 2022-12-05 15:33:49 | 1 |
| 2 | timestamp | 2023-10-04 16:19:54 | 1 |
| 2 | eventcategory | click_toggle | 1 |
| 2 | eventaction | Evaluation | 1 |
| 3 | eventcategory | click_button | 1 |
| 3 | eventcategory | videoplayer_click | 1 |
tar_load(data_all_fct)Der Roh-Datensatz verfügt über
Jede Zeile entspricht einem “Visit”.
data_all_fct_head100 <-
data_all_fct %>%
select(1:100) %>%
slice_head(n = 100) data_all_fct_head100 %>%
visdat::vis_dat()data_all_fct_head100 %>%
names() [1] "file_id"
[2] "idvisit"
[3] "visitip"
[4] "visitorid"
[5] "fingerprint"
[6] "actiondetails_0_type"
[7] "actiondetails_0_url"
[8] "actiondetails_0_pageidaction"
[9] "actiondetails_0_idpageview"
[10] "actiondetails_0_servertimepretty"
[11] "actiondetails_0_pageid"
[12] "actiondetails_0_sitesearchkeyword"
[13] "actiondetails_0_sitesearchcount"
[14] "actiondetails_0_pageviewposition"
[15] "actiondetails_0_title"
[16] "actiondetails_0_subtitle"
[17] "actiondetails_0_timestamp"
[18] "sitecurrency"
[19] "sitecurrencysymbol"
[20] "serverdate"
[21] "visitserverhour"
[22] "lastactiontimestamp"
[23] "lastactiondatetime"
[24] "servertimestamp"
[25] "firstactiontimestamp"
[26] "servertimepretty"
[27] "serverdatepretty"
[28] "serverdateprettyfirstaction"
[29] "servertimeprettyfirstaction"
[30] "visitortype"
[31] "visitcount"
[32] "dayssincefirstvisit"
[33] "secondssincefirstvisit"
[34] "visitduration"
[35] "visitdurationpretty"
[36] "searches"
[37] "actions"
[38] "interactions"
[39] "referrertype"
[40] "referrertypename"
[41] "referrername"
[42] "referrerurl"
[43] "referrersearchengineurl"
[44] "languagecode"
[45] "language"
[46] "devicetype"
[47] "devicebrand"
[48] "devicemodel"
[49] "operatingsystem"
[50] "operatingsystemname"
[51] "operatingsystemcode"
[52] "operatingsystemversion"
[53] "browserfamily"
[54] "browserfamilydescription"
[55] "browser"
[56] "browsername"
[57] "browsercode"
[58] "browserversion"
[59] "events"
[60] "continent"
[61] "continentcode"
[62] "country"
[63] "countrycode"
[64] "countryflag"
[65] "region"
[66] "regioncode"
[67] "city"
[68] "location"
[69] "latitude"
[70] "longitude"
[71] "visitlocaltime"
[72] "visitlocalhour"
[73] "dayssincelastvisit"
[74] "secondssincelastvisit"
[75] "resolution"
[76] "plugins"
[77] "provider"
[78] "providername"
[79] "providerurl"
[80] "actiondetails_0_pageloadtime"
[81] "actiondetails_0_timespent"
[82] "actiondetails_0_timespentpretty"
[83] "actiondetails_0_pageloadtimemilliseconds"
[84] "actiondetails_1_type"
[85] "actiondetails_1_url"
[86] "actiondetails_1_pageidaction"
[87] "actiondetails_1_idpageview"
[88] "actiondetails_1_servertimepretty"
[89] "actiondetails_1_pageid"
[90] "actiondetails_1_eventcategory"
[91] "actiondetails_1_eventaction"
[92] "actiondetails_1_pageviewposition"
[93] "actiondetails_1_timestamp"
[94] "actiondetails_1_title"
[95] "actiondetails_1_subtitle"
[96] "actiondetails_2_type"
[97] "actiondetails_2_url"
[98] "actiondetails_2_pageidaction"
[99] "actiondetails_2_idpageview"
[100] "actiondetails_2_servertimepretty"
data_all_fct_head100 %>%
glimpse()Rows: 100
Columns: 100
$ file_id <fct> matomo_export_2023-03-23.csv,…
$ idvisit <fct> 17, 16, 15, 13, 14, 11, 10, 1…
$ visitip <fct> 87.150.0.0, 217.84.0.0, 90.18…
$ visitorid <fct> 9b735f0eb17af6a8, c77ae8b840d…
$ fingerprint <fct> 86408e41c606f8f7, 0584640e01c…
$ actiondetails_0_type <fct> search, action, action, actio…
$ actiondetails_0_url <fct> NA, https://hans.th-nuernberg…
$ actiondetails_0_pageidaction <fct> NA, 2, 2, 3, 2, 9, 3, 4, 2, 4…
$ actiondetails_0_idpageview <fct> NA, PlZbGn, 0f7b07, A6owU2, h…
$ actiondetails_0_servertimepretty <fct> "Mar 23, 2023 21:18:53", "Mar…
$ actiondetails_0_pageid <fct> 325, 324, 323, 164, 176, 145,…
$ actiondetails_0_sitesearchkeyword <fct> "GDI", NA, NA, NA, NA, NA, NA…
$ actiondetails_0_sitesearchcount <fct> 0, NA, NA, NA, NA, NA, NA, NA…
$ actiondetails_0_pageviewposition <fct> 1, 1, 1, 1, 1, 1, 1, NA, 1, N…
$ actiondetails_0_title <fct> Site Search, HAnS, HAnS, HAnS…
$ actiondetails_0_subtitle <fct> "GDI", "https://hans.th-nuern…
$ actiondetails_0_timestamp <fct> 2023-03-23 21:18:53, 2023-03-…
$ sitecurrency <fct> USD, USD, USD, USD, USD, USD,…
$ sitecurrencysymbol <fct> $, $, $, $, $, $, $, $, $, $,…
$ serverdate <fct> 2023-03-23, 2023-03-23, 2023-…
$ visitserverhour <fct> 20, 20, 18, 18, 17, 17, 16, 1…
$ lastactiontimestamp <fct> 2023-03-23 20:18:53, 2023-03-…
$ lastactiondatetime <fct> 2023-03-23 20:18:53, 2023-03-…
$ servertimestamp <fct> 2023-03-23 20:18:53, 2023-03-…
$ firstactiontimestamp <fct> 2023-03-23 20:18:53, 2023-03-…
$ servertimepretty <fct> 21:18:53, 21:01:30, 19:56:41,…
$ serverdatepretty <fct> "Thursday, March 23, 2023", "…
$ serverdateprettyfirstaction <fct> "Thursday, March 23, 2023", "…
$ servertimeprettyfirstaction <fct> 21:18:53, 21:01:30, 19:56:41,…
$ visitortype <fct> new, new, new, returning, new…
$ visitcount <fct> 1, 1, 1, 4, 1, 3, 1, 3, 1, 2,…
$ dayssincefirstvisit <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ secondssincefirstvisit <fct> 0, 0, 0, 15464, 0, 10678, 0, …
$ visitduration <fct> 0, 20, 3, 1494, 0, 2555, 79, …
$ visitdurationpretty <fct> 0s, 20s, 3s, 24 min 54s, 0s, …
$ searches <fct> 1, 0, 0, 7, 0, 2, 1, 0, 0, 9,…
$ actions <fct> 1, 1, 1, 158, 1, 12, 9, 1, 1,…
$ interactions <fct> 1, 1, 1, 38, 1, 7, 5, 0, 1, 5…
$ referrertype <fct> website, website, website, di…
$ referrertypename <fct> Websites, Websites, Websites,…
$ referrername <fct> elearning.ohmportal.de, elear…
$ referrerurl <fct> https://elearning.ohmportal.d…
$ referrersearchengineurl <fct> NA, NA, NA, NA, NA, NA, NA, N…
$ languagecode <fct> de-de, de, de-de, en-us, de-d…
$ language <fct> Language code de-de, German, …
$ devicetype <fct> Desktop, Desktop, Desktop, De…
$ devicebrand <fct> Apple, Unknown, Unknown, Appl…
$ devicemodel <fct> Generic Desktop, Generic Desk…
$ operatingsystem <fct> Mac 10.15, Windows 11, Window…
$ operatingsystemname <fct> Mac, Windows, Windows, Mac, i…
$ operatingsystemcode <fct> MAC, WIN, WIN, MAC, IOS, MAC,…
$ operatingsystemversion <fct> 10.15, 11, 10, 10.15, 16.3, 1…
$ browserfamily <fct> WebKit, Blink, Blink, Gecko, …
$ browserfamilydescription <fct> "WebKit (Safari)", "Blink (Ch…
$ browser <fct> Safari 16.3, Microsoft Edge 1…
$ browsername <fct> Safari, Microsoft Edge, Chrom…
$ browsercode <fct> SF, PS, CH, FF, MF, FF, PS, F…
$ browserversion <fct> 16.3, 111, 111, 111, 16.3, 11…
$ events <fct> 0, 0, 0, 120, 0, 5, 4, 1, 0, …
$ continent <fct> Europe, Europe, Europe, North…
$ continentcode <fct> eur, eur, eur, amn, eur, amn,…
$ country <fct> Germany, Germany, Germany, Un…
$ countrycode <fct> de, de, de, us, de, us, de, u…
$ countryflag <fct> plugins/Morpheus/icons/dist/f…
$ region <fct> NA, NA, NA, NA, NA, NA, NA, N…
$ regioncode <fct> NA, NA, NA, NA, NA, NA, NA, N…
$ city <fct> NA, NA, NA, NA, NA, NA, NA, N…
$ location <fct> "Germany", "Germany", "German…
$ latitude <fct> NA, NA, NA, NA, NA, NA, NA, N…
$ longitude <fct> NA, NA, NA, NA, NA, NA, NA, N…
$ visitlocaltime <fct> 21:18:53, 21:01:29, 19:56:40,…
$ visitlocalhour <fct> 21, 21, 19, 18, 18, 17, 17, 1…
$ dayssincelastvisit <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ secondssincelastvisit <fct> 0, 0, 0, 4786, 0, 7331, 0, 73…
$ resolution <fct> 810x1080, 1536x864, 1920x1080…
$ plugins <fct> "cookie", "cookie, pdf", "coo…
$ provider <fct> NA, NA, NA, NA, NA, NA, NA, N…
$ providername <fct> "Unknown", "Unknown", "Unknow…
$ providerurl <fct> NA, NA, NA, NA, NA, NA, NA, N…
$ actiondetails_0_pageloadtime <fct> NA, 2.23s, 0.55s, NA, 1s, NA,…
$ actiondetails_0_timespent <fct> NA, 20, 0, 3, 0, 25, 4, NA, 0…
$ actiondetails_0_timespentpretty <fct> NA, 20s, 0s, 3s, 0s, 25s, 4s,…
$ actiondetails_0_pageloadtimemilliseconds <fct> NA, 2233, 550, NA, 1001, NA, …
$ actiondetails_1_type <fct> NA, NA, NA, event, NA, event,…
$ actiondetails_1_url <fct> NA, NA, NA, "https://hans.th-…
$ actiondetails_1_pageidaction <fct> NA, NA, NA, 10, NA, 10, 4, NA…
$ actiondetails_1_idpageview <fct> NA, NA, NA, w2vQcb, NA, ZZd1q…
$ actiondetails_1_servertimepretty <fct> NA, NA, NA, "Mar 23, 2023 18:…
$ actiondetails_1_pageid <fct> NA, NA, NA, 165, NA, 147, 143…
$ actiondetails_1_eventcategory <fct> NA, NA, NA, click_button, NA,…
$ actiondetails_1_eventaction <fct> NA, NA, NA, "Medien", NA, "ET…
$ actiondetails_1_pageviewposition <fct> NA, NA, NA, 1, NA, 1, 1, NA, …
$ actiondetails_1_timestamp <fct> NA, NA, NA, 2023-03-23 18:37:…
$ actiondetails_1_title <fct> NA, NA, NA, Event, NA, Event,…
$ actiondetails_1_subtitle <fct> NA, NA, NA, "Category: \"\"cl…
$ actiondetails_2_type <fct> NA, NA, NA, action, NA, searc…
$ actiondetails_2_url <fct> NA, NA, NA, "https://hans.th-…
$ actiondetails_2_pageidaction <fct> NA, NA, NA, 9, NA, NA, 2, NA,…
$ actiondetails_2_idpageview <fct> NA, NA, NA, EGozRT, NA, ZZd1q…
$ actiondetails_2_servertimepretty <fct> NA, NA, NA, "Mar 23, 2023 18:…
tar_load(data_slim_filtered)
data_slim_filtered %>%
slice(1:100) |>
gt()| nr | type | value | idvisit |
|---|---|---|---|
| 0 | subtitle | https://hans.th-nuernberg.de/login | 1 |
| 0 | subtitle | https://hans.th-nuernberg.de/login?evalId=none&user=undefined&role=undefined | 1 |
| 0 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 0 | subtitle | https://hans.th-nuernberg.de/login?evalId=none&role=undefined | 1 |
| 0 | timestamp | 2023-03-23 14:20:12 | 1 |
| 0 | timestamp | 2024-06-06 11:19:16 | 1 |
| 0 | timestamp | 2022-12-05 15:33:45 | 1 |
| 0 | timestamp | 2023-10-04 16:19:46 | 1 |
| 1 | eventcategory | click_videocard | 1 |
| 1 | eventcategory | login | 1 |
| 1 | eventaction | Einführung in HAnS | 1 |
| 1 | eventaction | success | 1 |
| 1 | timestamp | 2023-03-23 14:20:26 | 1 |
| 1 | timestamp | 2024-06-06 11:21:07 | 1 |
| 1 | timestamp | 2022-12-05 15:33:49 | 1 |
| 1 | timestamp | 2023-10-04 16:19:54 | 1 |
| 1 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 1 | subtitle | https://hans.th-nuernberg.de/login?evalId=none&user=undefined&role=undefined | 1 |
| 1 | subtitle | Category: ""click_videocard', Action: ""Einführung in HAnS"" | 1 |
| 1 | subtitle | Category: ""login', Action: ""success"" | 1 |
| 2 | subtitle | Category: ""click_toggle', Action: ""Evaluation"" | 1 |
| 2 | subtitle | https://hans.th-nuernberg.de/video-player?uuid=b57c1dfe-a667-48a6-b43d-dac7e600ae8c | 1 |
| 2 | subtitle | https://hans.th-nuernberg.de/?evalId=none&role=developer | 1 |
| 2 | timestamp | 2023-03-23 14:20:28 | 1 |
| 2 | timestamp | 2022-12-05 15:33:49 | 1 |
| 2 | timestamp | 2023-10-04 16:19:54 | 1 |
| 2 | eventcategory | click_toggle | 1 |
| 2 | eventaction | Evaluation | 1 |
| 3 | eventcategory | click_button | 1 |
| 3 | eventcategory | videoplayer_click | 1 |
| 3 | eventcategory | click_button | 1 |
| 3 | eventaction | Kanäle | 1 |
| 3 | eventaction | play | 1 |
| 3 | eventaction | Kanäle | 1 |
| 3 | timestamp | 2023-03-23 14:20:30 | 1 |
| 3 | timestamp | 2022-12-05 15:33:49 | 1 |
| 3 | timestamp | 2023-10-04 16:19:56 | 1 |
| 3 | subtitle | Category: ""click_button', Action: ""Kanäle"" | 1 |
| 3 | subtitle | Category: ""videoplayer_click', Action: ""play"" | 1 |
| 3 | subtitle | Category: ""click_button', Action: ""Kanäle"" | 1 |
| 4 | subtitle | https://hans.th-nuernberg.de/channels | 1 |
| 4 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 4 | subtitle | https://hans.th-nuernberg.de/channels?evalId=none&role=developer | 1 |
| 4 | timestamp | 2023-03-23 14:20:30 | 1 |
| 4 | timestamp | 2022-12-05 15:33:52 | 1 |
| 4 | timestamp | 2023-10-04 16:19:56 | 1 |
| 5 | eventcategory | click_button | 1 |
| 5 | eventcategory | click_button | 1 |
| 5 | eventaction | Medien | 1 |
| 5 | eventaction | Medien | 1 |
| 5 | timestamp | 2023-03-23 14:20:31 | 1 |
| 5 | timestamp | 2022-12-05 15:33:52 | 1 |
| 5 | timestamp | 2023-10-04 16:21:23 | 1 |
| 5 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 5 | subtitle | Category: ""click_button', Action: ""Medien"" | 1 |
| 5 | subtitle | Category: ""click_button', Action: ""Medien"" | 1 |
| 5 | pagetitle | HAnS | 1 |
| 6 | eventcategory | click_button | 1 |
| 6 | eventaction | Medien | 1 |
| 6 | timestamp | 2023-03-23 14:20:31 | 1 |
| 6 | timestamp | 2022-12-05 15:55:43 | 1 |
| 6 | timestamp | 2023-10-04 16:21:23 | 1 |
| 6 | subtitle | Category: ""click_button', Action: ""Medien"" | 1 |
| 6 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 6 | subtitle | https://hans.th-nuernberg.de/?evalId=none&role=developer | 1 |
| 6 | pagetitle | HAnS | 1 |
| 6 | pagetitle | HAnS | 1 |
| 7 | pagetitle | HAnS | 1 |
| 7 | subtitle | Category: ""click_toggle', Action: ""Evaluation"" | 1 |
| 7 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 7 | subtitle | Category: ""click_button', Action: ""Medien"" | 1 |
| 7 | timestamp | 2023-03-23 14:20:34 | 1 |
| 7 | timestamp | 2022-12-05 16:03:01 | 1 |
| 7 | timestamp | 2023-10-04 16:25:22 | 1 |
| 7 | eventcategory | click_toggle | 1 |
| 7 | eventcategory | click_button | 1 |
| 7 | eventaction | Evaluation | 1 |
| 7 | eventaction | Medien | 1 |
| 8 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 8 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 8 | subtitle | Category: ""click_button', Action: ""Kanäle"" | 1 |
| 8 | timestamp | 2023-03-23 14:21:47 | 1 |
| 8 | timestamp | 2022-12-05 16:03:06 | 1 |
| 8 | timestamp | 2023-10-04 16:25:23 | 1 |
| 8 | eventcategory | click_button | 1 |
| 8 | eventaction | Kanäle | 1 |
| 9 | eventcategory | click_toggle | 1 |
| 9 | eventaction | Evaluation | 1 |
| 9 | timestamp | 2023-03-23 14:22:41 | 1 |
| 9 | timestamp | 2022-12-05 16:03:16 | 1 |
| 9 | timestamp | 2023-10-04 16:25:23 | 1 |
| 9 | subtitle | Category: ""click_toggle', Action: ""Evaluation"" | 1 |
| 9 | subtitle | https://hans.th-nuernberg.de/video-player?uuid=b57c1dfe-a667-48a6-b43d-dac7e600ae8c | 1 |
| 9 | subtitle | https://hans.th-nuernberg.de/channels?evalId=none&role=developer | 1 |
| 10 | eventcategory | videoplayer_click | 1 |
| 10 | eventaction | play | 1 |
| 10 | timestamp | 2023-03-23 14:22:45 | 1 |
| 10 | timestamp | 2022-12-05 16:03:16 | 1 |
| 10 | timestamp | 2023-10-04 16:25:24 | 1 |
| 10 | subtitle | https://hans.th-nuernberg.de/channels | 1 |
Entfernt man Developer, Admins und Lecturers aus dem Roh-Datensatz so bleiben weniger Zeilen übrig:
tar_load(data_users_only)tar_load(count_action)
count_action |>
head(30) |>
kable()| idvisit | nr_max |
|---|---|
| 1 | 90 |
| 2 | 5 |
| 3 | 29 |
| 4 | 340 |
| 5 | 265 |
| 6 | 32 |
| 7 | 122 |
| 8 | 1 |
| 9 | 39 |
| 10 | 56 |
| 11 | 177 |
| 12 | 266 |
| 13 | 157 |
| 14 | 118 |
| 15 | 158 |
| 16 | 117 |
| 17 | 499 |
| 18 | 499 |
| 19 | 164 |
| 20 | 22 |
| 21 | 499 |
| 22 | 163 |
| 23 | 49 |
| 24 | 14 |
| 25 | 25 |
| 26 | 59 |
| 27 | 151 |
| 28 | 23 |
| 29 | 45 |
| 30 | 133 |
tar_load(config)tar_load(time_minmax)
time_minmax |>
head(30) |>
kable()| idvisit | time_min | time_max |
|---|---|---|
| 1 | 2022-12-05 15:33:45 | 2024-06-06 11:21:07 |
| 2 | 2022-12-05 15:35:45 | 2023-03-23 14:31:36 |
| 3 | 2024-03-04 10:25:00 | 2024-06-06 11:50:30 |
| 4 | 2022-12-05 16:20:05 | 2024-10-07 12:20:29 |
| 5 | 2022-12-05 16:34:34 | 2023-10-04 20:56:37 |
| 6 | 2022-12-05 16:42:53 | 2024-06-06 12:40:58 |
| 7 | 2022-12-05 17:06:08 | 2024-10-07 14:34:33 |
| 8 | 2024-03-04 13:33:11 | 2024-06-06 13:13:16 |
| 9 | 2022-12-05 22:30:01 | 2024-06-06 13:31:33 |
| 10 | 2023-03-23 17:17:36 | 2024-06-06 13:34:51 |
| 11 | 2022-12-06 08:09:01 | 2024-10-07 15:13:15 |
| 12 | 2022-12-06 09:26:13 | 2024-06-06 16:22:08 |
| 13 | 2022-12-06 10:10:10 | 2024-10-07 16:59:17 |
| 14 | 2022-12-06 12:04:24 | 2024-06-06 15:47:58 |
| 15 | 2022-12-06 13:10:12 | 2024-10-08 08:28:21 |
| 16 | 2024-06-06 16:30:32 | 2024-06-06 17:23:23 |
| 17 | 2022-12-06 13:59:27 | 2024-10-08 11:56:22 |
| 18 | 2022-12-06 14:35:57 | 2024-06-06 18:12:59 |
| 19 | 2022-12-06 14:39:14 | 2024-06-06 20:12:09 |
| 20 | 2022-12-06 16:17:59 | 2024-06-06 21:22:11 |
| 21 | 2022-12-06 16:31:52 | 2024-10-08 14:40:45 |
| 22 | 2022-12-06 17:06:34 | 2024-10-08 15:54:44 |
| 23 | 2022-12-06 17:26:27 | 2024-06-07 09:04:56 |
| 24 | 2022-12-06 17:33:51 | 2024-10-08 15:55:58 |
| 25 | 2023-03-24 09:59:37 | 2024-06-07 10:42:23 |
| 26 | 2023-03-24 10:32:49 | 2024-06-07 10:42:53 |
| 27 | 2022-12-06 20:38:32 | 2024-06-07 13:49:34 |
| 28 | 2022-12-06 20:53:53 | 2024-06-07 11:52:27 |
| 29 | 2022-12-06 22:39:05 | 2024-06-07 13:40:09 |
| 30 | 2022-12-07 08:35:31 | 2024-10-08 18:00:01 |
time_minmax |>
summarise(time_min = min(time_min, na.rm = T),
time_max = max(time_max, na.rm = T)) |>
gt()| time_min | time_max |
|---|---|
| 2022-12-05 15:33:45 | 2025-03-03 14:08:54 |
Diese Statistik wurde auf Basis des Datenobjekts data_slim_filtered berechnet, vgl. das Target dieses Objekts in der Pipeline.
tar_load(time_visit_wday)
tar_load(time_since_last_visit)time_visit_wday |>
head(30) |>
kable()| idvisit | dow | hour | date_time |
|---|---|---|---|
| 1 | 4 | 14 | 2023-03-23 14:20:12 |
| 2 | 4 | 14 | 2023-03-23 14:29:42 |
| 3 | 1 | 10 | 2024-03-04 10:25:00 |
| 4 | 4 | 15 | 2023-03-23 15:15:59 |
| 5 | 1 | 16 | 2022-12-05 16:34:34 |
| 6 | 4 | 12 | 2024-06-06 12:05:49 |
| 7 | 1 | 17 | 2022-12-05 17:06:08 |
| 8 | 1 | 13 | 2024-03-04 13:33:11 |
| 9 | 4 | 13 | 2024-06-06 13:30:26 |
| 10 | 4 | 17 | 2023-03-23 17:17:36 |
| 11 | 4 | 17 | 2023-03-23 17:18:10 |
| 12 | 1 | 16 | 2024-03-04 16:47:24 |
| 13 | 4 | 18 | 2023-03-23 18:37:56 |
| 14 | 4 | 15 | 2024-06-06 15:47:37 |
| 15 | 4 | 15 | 2024-06-06 15:57:14 |
| 16 | 4 | 16 | 2024-06-06 16:30:32 |
| 17 | 4 | 17 | 2024-06-06 17:31:43 |
| 18 | 5 | 9 | 2023-03-24 09:23:24 |
| 19 | 4 | 20 | 2024-06-06 20:12:04 |
| 20 | 5 | 9 | 2023-03-24 09:33:57 |
| 21 | 5 | 7 | 2024-06-07 07:56:34 |
| 22 | 2 | 8 | 2024-03-05 08:31:43 |
| 23 | 5 | 9 | 2023-03-24 09:57:44 |
| 24 | 5 | 9 | 2023-03-24 09:57:56 |
| 25 | 5 | 9 | 2023-03-24 09:59:37 |
| 26 | 5 | 10 | 2023-03-24 10:32:49 |
| 27 | 5 | 11 | 2024-06-07 11:09:46 |
| 28 | 5 | 10 | 2023-03-24 10:39:07 |
| 29 | 5 | 11 | 2023-03-24 11:11:53 |
| 30 | 3 | 8 | 2022-12-07 08:35:31 |
time_since_last_visit <-
time_since_last_visit |>
mutate(dayssincelastvisit = as.numeric(dayssincelastvisit))
time_since_last_visit |>
datawizard::describe_distribution(dayssincelastvisit) |>
knitr::kable(digits = 2)| Variable | Mean | SD | IQR | Min | Max | Skewness | Kurtosis | n | n_Missing |
|---|---|---|---|---|---|---|---|---|---|
| dayssincelastvisit | 6.3 | 15.38 | 0 | 1 | 92 | 3.32 | 10.84 | 18924 | 162 |
time_since_last_visit |>
ggplot(aes(x=dayssincelastvisit)) +
geom_density()Wie viele Visits (von Hans) gab es?
time_visit_wday_summary <-
time_visit_wday |>
ungroup() |>
mutate(month_start = floor_date(date_time, "month")) |>
mutate(month_name = month(date_time, label = TRUE, abbr = FALSE),
month_num = month(date_time, label = FALSE),
year_num = year(date_time)) |>
group_by(year_num, month_num) |>
summarise(n = n())time_visit_wday_summary |>
gt()| month_num | n |
|---|---|
| 2022 | |
| 12 | 25 |
| 2023 | |
| 1 | 20 |
| 2 | 77 |
| 3 | 99 |
| 4 | 219 |
| 5 | 300 |
| 6 | 246 |
| 7 | 390 |
| 8 | 20 |
| 9 | 27 |
| 10 | 22 |
| 11 | 8 |
| 12 | 98 |
| 2024 | |
| 1 | 552 |
| 2 | 71 |
| 3 | 51 |
| 4 | 94 |
| 5 | 303 |
| 6 | 200 |
| 7 | 192 |
| 8 | 6 |
| 9 | 5 |
| 10 | 8 |
| 11 | 149 |
| 12 | 650 |
| 2025 | |
| 1 | 830 |
| 2 | 131 |
| 3 | 2 |
time_visit_wday_summary2 <-
time_visit_wday |>
ungroup() |>
mutate(month_start = floor_date(date_time, "month")) |>
mutate(month_name = month(date_time, label = TRUE, abbr = FALSE),
month_num = month(date_time, label = FALSE),
year_num = year(date_time)) |>
group_by(year_num, month_start) |>
summarise(n = n())
time_visit_wday_summary2 |>
ggplot(aes(x = month_start, y = n)) +
geom_line(group = 1, color = "grey60") +
geom_point() time_visit_wday_summary2 <-
time_visit_wday |>
ungroup() |>
mutate(month_start = floor_date(date_time, "month")) |>
mutate(month_name = month(date_time, label = TRUE, abbr = FALSE),
month_num = month(date_time, label = FALSE),
year_num = year(date_time)) |>
group_by(year_num, month_start) |>
summarise(n = n()) |>
ungroup() |>
mutate(n_cumsum = cumsum(n))
time_visit_wday_summary2 |>
ggplot(aes(x = month_start, y = n_cumsum)) +
geom_line(group = 1, color = "grey60") +
geom_point() Die folgenden Statistiken beruhen auf dem Datensatz data_slim_filtered:
glimpse(data_slim_filtered)Rows: 3,597,119
Columns: 4
$ nr <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,…
$ type <fct> subtitle, subtitle, subtitle, subtitle, timestamp, timestamp, …
$ value <chr> "https://hans.th-nuernberg.de/login", "https://hans.th-nuernbe…
$ idvisit <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
nr fasst die Nummer der Aktion innerhalb eines bestimmten Visits.
tbl_count_action <-
count_action |>
describe_distribution(nr_max) |>
gt() |>
fmt_number(columns = where(is.numeric),
decimals = 2)
tbl_count_action| Variable | Mean | SD | IQR | Min | Max | Skewness | Kurtosis | n | n_Missing |
|---|---|---|---|---|---|---|---|---|---|
| nr_max | 129.35 | 152.20 | 161.00 | 1.00 | 499.00 | 1.40 | 0.77 | 4,795.00 | 0.00 |
gtsave(tbl_count_action, filename = "reports/tbl_count_action.png")
nr_max gibt den Maximalwert von nr zurück, sagt also, wie viele Aktionen maximal von einem Visitor ausgeführt wurden.
Betrachtet man die Anzahl der Aktionen pro Visitor näher, so fällt auf, dass der Maximalwert (499) sehr häufig vorkommt:
count_action |>
count(nr_max) |>
ggplot(aes(x = nr_max, y = n)) +
geom_col()Hier noch in einer anderen Darstellung:
count_action |>
count(nr_max) |>
ggplot(aes(x = nr_max, y = n)) +
geom_point()Der Maximalwert ist einfach auffällig häufig:
count_action |>
count(nr_max == 499) |>
gt()| nr_max == 499 | n |
|---|---|
| FALSE | 4386 |
| TRUE | 409 |
Es erscheint plausibel, dass der Maximalwert alle “gekappten” (zensierten, abgeschnittenen) Werte fasst, also viele Werte, die eigentlich größer wären (aber dann zensiert wurden).
count_action2 <-
count_action |>
filter(nr_max != 499)
count_action2 |>
describe_distribution(nr_max) |>
gt() |>
fmt_number(columns = where(is.numeric),
decimals = 2)| Variable | Mean | SD | IQR | Min | Max | Skewness | Kurtosis | n | n_Missing |
|---|---|---|---|---|---|---|---|---|---|
| nr_max | 94.89 | 106.74 | 122.00 | 1.00 | 496.00 | 1.56 | 2.01 | 4,386.00 | 0.00 |
count_action_avg = mean(count_action$nr_max)
count_action_sd = sd(count_action$nr_max)
count_action |>
ggplot() +
geom_histogram(aes(x = nr_max)) +
labs(x = "Anzahl von Aktionen pro Visit",
y = "n",
caption = "Der vertikale Strich zeigt den Mittelwert; der horizontale die SD") +
theme_minimal() +
geom_vline(xintercept = count_action_avg,
color = palette_okabe_ito()[1]) +
geom_segment(x = count_action_avg-count_action_sd,
y = 0,
xend = count_action_avg + count_action_sd,
yend = 0,
color = palette_okabe_ito()[2],
size = 2) +
annotate("label", x = count_action_avg, y = 1500, label = "MW") +
annotate("label", x = count_action_avg + count_action_sd, y = 0, label = "SD") #geom_label(aes(x = count_action_avg), y = 1, label = "Mean")count_action_avg2 = mean(count_action2$nr_max)
count_action_sd2 = sd(count_action2$nr_max)
count_action2 |>
ggplot() +
geom_histogram(aes(x = nr_max)) +
labs(x = "Anzahl von Aktionen pro Visit",
y = "n",
title = "Verteilung der User-Aktionen pro Visit",
caption = "Der vertikale Strich zeigt den Mittelwert; der horizontale die SD") +
theme_minimal() +
geom_vline(xintercept = count_action_avg2,
color = palette_okabe_ito()[1]) +
geom_segment(x = count_action_avg-count_action_sd2,
y = 0,
xend = count_action_avg2 + count_action_sd2,
yend = 0,
color = palette_okabe_ito()[2],
size = 2) +
annotate("label", x = count_action_avg2, y = 1500, label = "MW", vjust = "top") +
annotate("label", x = count_action_avg2 + count_action_sd2, y = 0, label = "SD", vjust = "bottom") #geom_label(aes(x = count_action_avg), y = 1, label = "Mean")Die Verweildauer wurde berechnet als Differenz zwischen kleinstem und größtem Datumszeitwert (POSixct) eines Visits, vgl. [Funktion diff_time](https://github.com/sebastiansauer/hans/blob/main/funs/diff_time.R). Diese Variable heißttime_diffim Objekttime_spent`.
Dabei wird das Objekt data_slim_filtered herangezogen, vgl. die Definition es Targets “time_spent” in der Targets-Pipeline.
tar_load(data_slim_filtered)
head(data_slim_filtered, 20) |> kable()| nr | type | value | idvisit |
|---|---|---|---|
| 0 | subtitle | https://hans.th-nuernberg.de/login | 1 |
| 0 | subtitle | https://hans.th-nuernberg.de/login?evalId=none&user=undefined&role=undefined | 1 |
| 0 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 0 | subtitle | https://hans.th-nuernberg.de/login?evalId=none&role=undefined | 1 |
| 0 | timestamp | 2023-03-23 14:20:12 | 1 |
| 0 | timestamp | 2024-06-06 11:19:16 | 1 |
| 0 | timestamp | 2022-12-05 15:33:45 | 1 |
| 0 | timestamp | 2023-10-04 16:19:46 | 1 |
| 1 | eventcategory | click_videocard | 1 |
| 1 | eventcategory | login | 1 |
| 1 | eventaction | Einführung in HAnS | 1 |
| 1 | eventaction | success | 1 |
| 1 | timestamp | 2023-03-23 14:20:26 | 1 |
| 1 | timestamp | 2024-06-06 11:21:07 | 1 |
| 1 | timestamp | 2022-12-05 15:33:49 | 1 |
| 1 | timestamp | 2023-10-04 16:19:54 | 1 |
| 1 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 1 | subtitle | https://hans.th-nuernberg.de/login?evalId=none&user=undefined&role=undefined | 1 |
| 1 | subtitle | Category: ““click_videocard’, Action:”“Einführung in HAnS”” | 1 |
| 1 | subtitle | Category: ““login’, Action:”“success”” | 1 |
Die Visit-Zeit wurde auf 600 Min. trunkiert/begrenzt.
tar_load(time_spent)
tar_load(time_duration)time_spent |>
head(30) |>
kable()| nr | type | value | idvisit | time | time_diff |
|---|---|---|---|---|---|
| 0 | timestamp | 2023-03-23 14:20:12 | 1 | 1679581212 | 47418442 |
| 0 | timestamp | 2024-06-06 11:19:16 | 1 | 1717672756 | 47418442 |
| 0 | timestamp | 2022-12-05 15:33:45 | 1 | 1670254425 | 47418442 |
| 0 | timestamp | 2023-10-04 16:19:46 | 1 | 1696436386 | 47418442 |
| 1 | timestamp | 2023-03-23 14:20:26 | 1 | 1679581226 | 47418442 |
| 1 | timestamp | 2024-06-06 11:21:07 | 1 | 1717672867 | 47418442 |
| 1 | timestamp | 2022-12-05 15:33:49 | 1 | 1670254429 | 47418442 |
| 1 | timestamp | 2023-10-04 16:19:54 | 1 | 1696436394 | 47418442 |
| 2 | timestamp | 2023-03-23 14:20:28 | 1 | 1679581228 | 47418442 |
| 2 | timestamp | 2022-12-05 15:33:49 | 1 | 1670254429 | 47418442 |
| 2 | timestamp | 2023-10-04 16:19:54 | 1 | 1696436394 | 47418442 |
| 3 | timestamp | 2023-03-23 14:20:30 | 1 | 1679581230 | 47418442 |
| 3 | timestamp | 2022-12-05 15:33:49 | 1 | 1670254429 | 47418442 |
| 3 | timestamp | 2023-10-04 16:19:56 | 1 | 1696436396 | 47418442 |
| 4 | timestamp | 2023-03-23 14:20:30 | 1 | 1679581230 | 47418442 |
| 4 | timestamp | 2022-12-05 15:33:52 | 1 | 1670254432 | 47418442 |
| 4 | timestamp | 2023-10-04 16:19:56 | 1 | 1696436396 | 47418442 |
| 5 | timestamp | 2023-03-23 14:20:31 | 1 | 1679581231 | 47418442 |
| 5 | timestamp | 2022-12-05 15:33:52 | 1 | 1670254432 | 47418442 |
| 5 | timestamp | 2023-10-04 16:21:23 | 1 | 1696436483 | 47418442 |
| 6 | timestamp | 2023-03-23 14:20:31 | 1 | 1679581231 | 47418442 |
| 6 | timestamp | 2022-12-05 15:55:43 | 1 | 1670255743 | 47418442 |
| 6 | timestamp | 2023-10-04 16:21:23 | 1 | 1696436483 | 47418442 |
| 7 | timestamp | 2023-03-23 14:20:34 | 1 | 1679581234 | 47418442 |
| 7 | timestamp | 2022-12-05 16:03:01 | 1 | 1670256181 | 47418442 |
| 7 | timestamp | 2023-10-04 16:25:22 | 1 | 1696436722 | 47418442 |
| 8 | timestamp | 2023-03-23 14:21:47 | 1 | 1679581307 | 47418442 |
| 8 | timestamp | 2022-12-05 16:03:06 | 1 | 1670256186 | 47418442 |
| 8 | timestamp | 2023-10-04 16:25:23 | 1 | 1696436723 | 47418442 |
| 9 | timestamp | 2023-03-23 14:22:41 | 1 | 1679581361 | 47418442 |
time_spent <-
time_spent |>
# compute time (t) in minutes (min):
mutate(t_min = as.numeric(time_diff, units = "mins")) |>
filter(t_min < 600)Die Verweildauer ist dargestellt auf Grundlage oben dargestellter Berechnungsgrundlage.
time_spent |>
summarise(
mean_time_diff = round(mean(time_diff), 2),
sd_time_diff = sd(time_diff),
min_time_diff = min(time_diff),
max_time_diff = max(time_diff)
) |>
summarise(
mean_time_diff_avg = mean(mean_time_diff),
sd_time_diff_avg = mean(sd_time_diff, na.rm = TRUE),
min_time_diff_avg = mean(min_time_diff),
max_time_diff_avg = mean(max_time_diff)
) |>
gt() |>
fmt_number(columns = everything(),
decimals = 2)| mean_time_diff_avg | sd_time_diff_avg | min_time_diff_avg | max_time_diff_avg |
|---|---|---|---|
| 142.52 | 0.00 | 142.52 | 142.52 |
visitdurationAlternativ zur Berechnung der Verweildauer steht eine Variable, visitduration zur Verfügung, die (offenbar) die Dauer des Visits misst bzw. messen soll.
Allerdings resultieren substanziell andere Werte, wenn man diese Variable heranzieht zur Berechnung der Verweildauer, vgl. Target time_duration in der Targets-Pipeline.
time_duration |>
head(30) |>
kable()| idvisit | visitduration_sec |
|---|---|
| 17 | 0 |
| 16 | 20 |
| 15 | 3 |
| 13 | 1494 |
| 14 | 0 |
| 11 | 2555 |
| 10 | 79 |
| 12 | 0 |
| 9 | 5 |
| 4 | 5479 |
| 8 | 4 |
| 7 | 0 |
| 6 | 4 |
| 5 | 0 |
| 3 | 0 |
| 1 | 1054 |
| 2 | 115 |
| 38 | 2085 |
| 36 | 1614 |
| 37 | 335 |
| 34 | 3012 |
| 35 | 0 |
| 33 | 1457 |
| 32 | 4023 |
| 31 | 549 |
| 30 | 0 |
| 26 | 4679 |
| 29 | 36 |
| 28 | 820 |
| 27 | 0 |
time_duration |>
summarise(duration_sec_avg = mean(visitduration_sec, na.rm = TRUE)) |>
mutate(duration_min_avg = duration_sec_avg / 60) |>
kable()| duration_sec_avg | duration_min_avg |
|---|---|
| 1289.434 | 21.49056 |
time_spent_summary <-
time_spent |>
summarise(
mean_t_min = mean(t_min),
sd_t_min = sd(t_min),
min_t_min = min(t_min),
max_t_min = max(t_min)
) |>
summarise(
mean_t_min_avg = mean(mean_t_min),
sd_t_min_avg = mean(sd_t_min, na.rm = TRUE),
min_t_min_avg = mean(min_t_min),
max_t_min_avg = mean(max_t_min)
)
time_spent_summary |>
gt() |>
fmt_number(columns = everything(),
decimals = 2)| mean_t_min_avg | sd_t_min_avg | min_t_min_avg | max_t_min_avg |
|---|---|---|---|
| 142.52 | 0.00 | 142.52 | 142.52 |
small_padding_theme <- ggpubr::ttheme(
tbody.style = tbody_style(size = 8), # Smaller font size can help
colnames.style = colnames_style(size = 9, face = "bold"),
padding = unit(c(2, 2), "mm") # Reduce horizontal and vertical padding
)ggpubr::ggtexttable(time_spent_summary,
rows = NULL,
theme = small_padding_theme)time_spent |>
ggplot(aes(x = t_min)) +
geom_histogram() +
scale_x_time() +
theme_minimal() +
labs(y = "n",
x = "Verweildauer in HaNS pro Visit in Minuten")time_spent |>
ggplot(aes(x = t_min)) +
geom_histogram(binwidth = 5) +
theme_minimal() +
labs(y = "n",
x = "Verweildauer in Minuten",
title = "Verweildauer in HaNS pro Visit",
caption = "binwidth = 5 Min.")time_spent2 <-
time_spent |>
filter(t_min > 1, t_min < 120)
time_spent2 |>
ggplot(aes(x = t_min)) +
geom_histogram(binwidth = 10) +
theme_minimal() +
labs(y = "n",
x = "Verweildauer in HaNS pro Visit in Minuten",
title = "Verweildauer begrenzt auf 1-120 Minuten",
caption = "bindwidth = 10 Min.")time_spent_by_month <-
time_spent |>
mutate(date = ymd_hms(value)) |>
mutate(month_start = floor_date(date, "month")) |>
mutate(month_name = month(month_start, label = TRUE, abbr = FALSE),
month_num = month(month_start, label = FALSE),
year = year(month_start)) |>
group_by(month_num, year) |>
summarise(time_spent_month_avg = mean(t_min, na.rm = TRUE),
time_spent_month_sd = sd(t_min, na.rm = TRUE)) |>
arrange(year, month_num)
time_spent_by_month |>
gt() |>
fmt_auto()| year | time_spent_month_avg | time_spent_month_sd |
|---|---|---|
| 12 | ||
| 2,022 | 10 | 0 |
| 2,023 | 268.264 | 196.857 |
| 2,024 | 251.396 | 176.941 |
| 2 | ||
| 2,023 | 421 | 180.428 |
| 2,024 | 309.517 | 202.612 |
| 2,025 | 214.55 | 172.93 |
| 3 | ||
| 2,023 | 107.075 | 86.062 |
| 2,025 | 9 | 0 |
| 4 | ||
| 2,023 | 379.868 | 230.16 |
| 2,024 | 112 | 0 |
| 7 | ||
| 2,023 | 187.708 | 160.938 |
| 2,024 | 566.692 | 50.806 |
| 8 | ||
| 2,023 | 129.368 | 105.653 |
| 2,024 | 582 | 0 |
| 9 | ||
| 2,023 | 414.391 | 208.662 |
| 10 | ||
| 2,023 | 181.716 | 115.475 |
| 2,024 | 43.284 | 12.632 |
| 1 | ||
| 2,024 | 170.763 | 141.968 |
| 2,025 | 336.1 | 199.91 |
| 5 | ||
| 2,024 | 268.909 | 130.843 |
| 6 | ||
| 2,024 | 336.303 | 182.84 |
| 11 | ||
| 2,024 | 374.266 | 209.407 |
time_spent_by_month |>
mutate(time_spent_month_avg = round(time_spent_month_avg, 2),
time_spent_month_sd = round(time_spent_month_sd, 2)) |>
ggtexttable()time_spent_by_month_name <-
time_spent |>
mutate(date = ymd_hms(value)) |>
mutate(month_start = floor_date(date, "month")) |>
mutate(month_name = month(month_start, label = TRUE, abbr = FALSE),
month_num = month(month_start, label = FALSE),
year = year(month_start)) |>
group_by(month_start, year) |>
summarise(time_spent_month_avg = mean(t_min, na.rm = TRUE),
time_spent_month_sd = sd(t_min, na.rm = TRUE))
time_spent_by_month_name |>
ggplot(aes(x = month_start, y = time_spent_month_avg)) +
geom_line(group = 1, color = "grey60") +
geom_point() Was machen die Visitors eigentlich? Und wie oft?
tar_load(count_action_type)Für das Objekt count_action_type wurde die Spalte subtitle in den Langformat-Daten ausgewertet, s. Funktionsdefinition von count_user_action_type.
count_action_type |>
head(30) nr type
1 0 subtitle
2 0 subtitle
3 0 subtitle
4 0 subtitle
5 0 timestamp
6 0 timestamp
7 0 timestamp
8 0 timestamp
9 1 timestamp
10 1 timestamp
11 1 timestamp
12 1 timestamp
13 1 subtitle
14 1 subtitle
15 1 subtitle
16 1 subtitle
17 2 subtitle
18 2 subtitle
19 2 subtitle
20 2 timestamp
21 2 timestamp
22 2 timestamp
23 3 timestamp
24 3 timestamp
25 3 timestamp
26 3 subtitle
27 3 subtitle
28 3 subtitle
29 4 subtitle
30 4 subtitle
value
1 https://hans.th-nuernberg.de/login
2 https://hans.th-nuernberg.de/login?evalId=none&user=undefined&role=undefined
3 https://hans.th-nuernberg.de/
4 https://hans.th-nuernberg.de/login?evalId=none&role=undefined
5 2023-03-23 14:20:12
6 2024-06-06 11:19:16
7 2022-12-05 15:33:45
8 2023-10-04 16:19:46
9 2023-03-23 14:20:26
10 2024-06-06 11:21:07
11 2022-12-05 15:33:49
12 2023-10-04 16:19:54
13 https://hans.th-nuernberg.de/
14 https://hans.th-nuernberg.de/login?evalId=none&user=undefined&role=undefined
15 Category: click_videocard, Action: Einführung in HAnS
16 Category: login, Action: success
17 Category: click_toggle, Action: Evaluation
18 https://hans.th-nuernberg.de/video-player?uuid=b57c1dfe-a667-48a6-b43d-dac7e600ae8c
19 https://hans.th-nuernberg.de/?evalId=none&role=developer
20 2023-03-23 14:20:28
21 2022-12-05 15:33:49
22 2023-10-04 16:19:54
23 2023-03-23 14:20:30
24 2022-12-05 15:33:49
25 2023-10-04 16:19:56
26 Category: click_button, Action: Kanäle
27 Category: videoplayer_click, Action: play
28 Category: click_button, Action: Kanäle
29 https://hans.th-nuernberg.de/channels
30 https://hans.th-nuernberg.de/
idvisit category
1 1 visit_page
2 1 visit_page
3 1 visit_page
4 1 visit_page
5 1 <NA>
6 1 <NA>
7 1 <NA>
8 1 <NA>
9 1 <NA>
10 1 <NA>
11 1 <NA>
12 1 <NA>
13 1 visit_page
14 1 visit_page
15 1 video
16 1 login
17 1 Evaluation
18 1 visit_page
19 1 visit_page
20 1 <NA>
21 1 <NA>
22 1 <NA>
23 1 <NA>
24 1 <NA>
25 1 <NA>
26 1 Kanäle
27 1 video
28 1 Kanäle
29 1 visit_page
30 1 visit_page
Achtung: Es kann sinnvoller sein, alternativ zu dieser Analyse die Analyse auf Basis von eventcategory heranzuziehen. Dort werden alle Arten von Events berücksichtigt. Hier, in der vorliegenden, nur ausgewählte Events.
count_action_type_counted <-
count_action_type |>
drop_na() |>
count(category, sort = TRUE) |>
mutate(prop = round(n/sum(n), 2))
count_action_type_counted |>
gt()| category | n | prop |
|---|---|---|
| video | 657274 | 0.83 |
| click_slideChange | 61934 | 0.08 |
| visit_page | 40549 | 0.05 |
| Media item | 11046 | 0.01 |
| login | 4148 | 0.01 |
| in_media_search | 3044 | 0.00 |
| Search Results Count | 2668 | 0.00 |
| click_topic | 2623 | 0.00 |
| Medien | 1359 | 0.00 |
| logout | 1283 | 0.00 |
| Kanäle | 1222 | 0.00 |
| GESOA | 971 | 0.00 |
| click_channelcard | 661 | 0.00 |
| Evaluation | 173 | 0.00 |
| Data protection | 35 | 0.00 |
count_action_type_per_month <-
count_action_type |>
select(nr, idvisit, category) |>
ungroup() |>
left_join(time_visit_wday |> ungroup()) |>
select(-c(dow, hour, nr)) |>
drop_na() |>
mutate(month_start = floor_date(date_time, "month")) |>
count(month_start, category)count_action_type_per_month |>
gt()| month_start | category | n |
|---|---|---|
| 2022-12-01 | Evaluation | 2 |
| 2022-12-01 | GESOA | 6 |
| 2022-12-01 | Kanäle | 11 |
| 2022-12-01 | Media item | 208 |
| 2022-12-01 | Medien | 12 |
| 2022-12-01 | Search Results Count | 46 |
| 2022-12-01 | click_channelcard | 10 |
| 2022-12-01 | click_slideChange | 337 |
| 2022-12-01 | click_topic | 13 |
| 2022-12-01 | in_media_search | 9 |
| 2022-12-01 | login | 48 |
| 2022-12-01 | logout | 64 |
| 2022-12-01 | video | 2522 |
| 2022-12-01 | visit_page | 423 |
| 2023-01-01 | GESOA | 4 |
| 2023-01-01 | Kanäle | 2 |
| 2023-01-01 | Medien | 2 |
| 2023-01-01 | Search Results Count | 25 |
| 2023-01-01 | click_channelcard | 4 |
| 2023-01-01 | click_slideChange | 438 |
| 2023-01-01 | click_topic | 19 |
| 2023-01-01 | in_media_search | 7 |
| 2023-01-01 | login | 13 |
| 2023-01-01 | logout | 1 |
| 2023-01-01 | video | 3672 |
| 2023-01-01 | visit_page | 179 |
| 2023-02-01 | Data protection | 1 |
| 2023-02-01 | Kanäle | 7 |
| 2023-02-01 | Medien | 10 |
| 2023-02-01 | Search Results Count | 18 |
| 2023-02-01 | click_slideChange | 1823 |
| 2023-02-01 | click_topic | 3 |
| 2023-02-01 | in_media_search | 73 |
| 2023-02-01 | login | 38 |
| 2023-02-01 | logout | 10 |
| 2023-02-01 | video | 10606 |
| 2023-02-01 | visit_page | 491 |
| 2023-03-01 | Data protection | 3 |
| 2023-03-01 | Evaluation | 20 |
| 2023-03-01 | GESOA | 57 |
| 2023-03-01 | Kanäle | 218 |
| 2023-03-01 | Media item | 655 |
| 2023-03-01 | Medien | 215 |
| 2023-03-01 | Search Results Count | 215 |
| 2023-03-01 | click_channelcard | 102 |
| 2023-03-01 | click_slideChange | 1781 |
| 2023-03-01 | click_topic | 109 |
| 2023-03-01 | in_media_search | 86 |
| 2023-03-01 | login | 210 |
| 2023-03-01 | logout | 219 |
| 2023-03-01 | video | 9870 |
| 2023-03-01 | visit_page | 2394 |
| 2023-04-01 | Data protection | 2 |
| 2023-04-01 | Evaluation | 13 |
| 2023-04-01 | GESOA | 70 |
| 2023-04-01 | Kanäle | 181 |
| 2023-04-01 | Media item | 2796 |
| 2023-04-01 | Medien | 187 |
| 2023-04-01 | Search Results Count | 240 |
| 2023-04-01 | click_channelcard | 109 |
| 2023-04-01 | click_slideChange | 5428 |
| 2023-04-01 | click_topic | 203 |
| 2023-04-01 | in_media_search | 100 |
| 2023-04-01 | login | 364 |
| 2023-04-01 | logout | 251 |
| 2023-04-01 | video | 33235 |
| 2023-04-01 | visit_page | 3626 |
| 2023-05-01 | Evaluation | 24 |
| 2023-05-01 | GESOA | 193 |
| 2023-05-01 | Kanäle | 159 |
| 2023-05-01 | Media item | 228 |
| 2023-05-01 | Medien | 132 |
| 2023-05-01 | Search Results Count | 372 |
| 2023-05-01 | click_channelcard | 62 |
| 2023-05-01 | click_slideChange | 8371 |
| 2023-05-01 | click_topic | 445 |
| 2023-05-01 | in_media_search | 608 |
| 2023-05-01 | login | 320 |
| 2023-05-01 | logout | 91 |
| 2023-05-01 | video | 64184 |
| 2023-05-01 | visit_page | 4564 |
| 2023-06-01 | Evaluation | 14 |
| 2023-06-01 | GESOA | 100 |
| 2023-06-01 | Kanäle | 95 |
| 2023-06-01 | Medien | 60 |
| 2023-06-01 | Search Results Count | 337 |
| 2023-06-01 | click_channelcard | 52 |
| 2023-06-01 | click_slideChange | 10802 |
| 2023-06-01 | click_topic | 280 |
| 2023-06-01 | in_media_search | 536 |
| 2023-06-01 | login | 233 |
| 2023-06-01 | logout | 52 |
| 2023-06-01 | video | 70662 |
| 2023-06-01 | visit_page | 3486 |
| 2023-07-01 | Data protection | 5 |
| 2023-07-01 | Evaluation | 28 |
| 2023-07-01 | GESOA | 79 |
| 2023-07-01 | Kanäle | 147 |
| 2023-07-01 | Media item | 68 |
| 2023-07-01 | Medien | 106 |
| 2023-07-01 | Search Results Count | 264 |
| 2023-07-01 | click_channelcard | 68 |
| 2023-07-01 | click_slideChange | 6355 |
| 2023-07-01 | click_topic | 162 |
| 2023-07-01 | in_media_search | 624 |
| 2023-07-01 | login | 262 |
| 2023-07-01 | logout | 141 |
| 2023-07-01 | video | 68992 |
| 2023-07-01 | visit_page | 3784 |
| 2023-08-01 | Evaluation | 1 |
| 2023-08-01 | GESOA | 2 |
| 2023-08-01 | Kanäle | 10 |
| 2023-08-01 | Medien | 8 |
| 2023-08-01 | Search Results Count | 44 |
| 2023-08-01 | click_channelcard | 16 |
| 2023-08-01 | click_slideChange | 301 |
| 2023-08-01 | click_topic | 4 |
| 2023-08-01 | in_media_search | 1 |
| 2023-08-01 | login | 4 |
| 2023-08-01 | logout | 5 |
| 2023-08-01 | video | 2450 |
| 2023-08-01 | visit_page | 199 |
| 2023-09-01 | Evaluation | 5 |
| 2023-09-01 | GESOA | 4 |
| 2023-09-01 | Kanäle | 22 |
| 2023-09-01 | Medien | 10 |
| 2023-09-01 | Search Results Count | 41 |
| 2023-09-01 | click_channelcard | 19 |
| 2023-09-01 | click_slideChange | 109 |
| 2023-09-01 | click_topic | 15 |
| 2023-09-01 | in_media_search | 13 |
| 2023-09-01 | login | 6 |
| 2023-09-01 | logout | 9 |
| 2023-09-01 | video | 3292 |
| 2023-09-01 | visit_page | 267 |
| 2023-10-01 | Kanäle | 7 |
| 2023-10-01 | Media item | 310 |
| 2023-10-01 | Medien | 16 |
| 2023-10-01 | Search Results Count | 5 |
| 2023-10-01 | click_slideChange | 159 |
| 2023-10-01 | click_topic | 21 |
| 2023-10-01 | in_media_search | 5 |
| 2023-10-01 | login | 39 |
| 2023-10-01 | logout | 44 |
| 2023-10-01 | video | 1233 |
| 2023-10-01 | visit_page | 234 |
| 2023-11-01 | Kanäle | 3 |
| 2023-11-01 | Media item | 68 |
| 2023-11-01 | Medien | 1 |
| 2023-11-01 | Search Results Count | 2 |
| 2023-11-01 | click_channelcard | 8 |
| 2023-11-01 | click_slideChange | 98 |
| 2023-11-01 | click_topic | 1 |
| 2023-11-01 | login | 7 |
| 2023-11-01 | logout | 2 |
| 2023-11-01 | video | 1172 |
| 2023-11-01 | visit_page | 76 |
| 2023-12-01 | GESOA | 4 |
| 2023-12-01 | Kanäle | 3 |
| 2023-12-01 | Medien | 34 |
| 2023-12-01 | Search Results Count | 9 |
| 2023-12-01 | click_channelcard | 3 |
| 2023-12-01 | click_slideChange | 884 |
| 2023-12-01 | click_topic | 6 |
| 2023-12-01 | in_media_search | 6 |
| 2023-12-01 | login | 32 |
| 2023-12-01 | logout | 2 |
| 2023-12-01 | video | 8081 |
| 2023-12-01 | visit_page | 373 |
| 2024-01-01 | Evaluation | 23 |
| 2024-01-01 | GESOA | 60 |
| 2024-01-01 | Kanäle | 45 |
| 2024-01-01 | Media item | 144 |
| 2024-01-01 | Medien | 72 |
| 2024-01-01 | Search Results Count | 156 |
| 2024-01-01 | click_channelcard | 14 |
| 2024-01-01 | click_slideChange | 7781 |
| 2024-01-01 | click_topic | 106 |
| 2024-01-01 | in_media_search | 202 |
| 2024-01-01 | login | 317 |
| 2024-01-01 | logout | 92 |
| 2024-01-01 | video | 51149 |
| 2024-01-01 | visit_page | 2830 |
| 2024-02-01 | Evaluation | 1 |
| 2024-02-01 | GESOA | 3 |
| 2024-02-01 | Kanäle | 3 |
| 2024-02-01 | Medien | 3 |
| 2024-02-01 | Search Results Count | 37 |
| 2024-02-01 | click_channelcard | 1 |
| 2024-02-01 | click_slideChange | 786 |
| 2024-02-01 | click_topic | 8 |
| 2024-02-01 | in_media_search | 7 |
| 2024-02-01 | login | 60 |
| 2024-02-01 | logout | 16 |
| 2024-02-01 | video | 12419 |
| 2024-02-01 | visit_page | 448 |
| 2024-03-01 | Data protection | 1 |
| 2024-03-01 | Evaluation | 4 |
| 2024-03-01 | GESOA | 27 |
| 2024-03-01 | Kanäle | 12 |
| 2024-03-01 | Media item | 781 |
| 2024-03-01 | Medien | 21 |
| 2024-03-01 | Search Results Count | 70 |
| 2024-03-01 | click_channelcard | 13 |
| 2024-03-01 | click_slideChange | 526 |
| 2024-03-01 | click_topic | 78 |
| 2024-03-01 | in_media_search | 8 |
| 2024-03-01 | login | 94 |
| 2024-03-01 | logout | 20 |
| 2024-03-01 | video | 6388 |
| 2024-03-01 | visit_page | 924 |
| 2024-04-01 | Data protection | 2 |
| 2024-04-01 | Evaluation | 2 |
| 2024-04-01 | GESOA | 10 |
| 2024-04-01 | Kanäle | 20 |
| 2024-04-01 | Medien | 31 |
| 2024-04-01 | Search Results Count | 84 |
| 2024-04-01 | click_channelcard | 12 |
| 2024-04-01 | click_slideChange | 1727 |
| 2024-04-01 | click_topic | 140 |
| 2024-04-01 | in_media_search | 94 |
| 2024-04-01 | login | 104 |
| 2024-04-01 | logout | 29 |
| 2024-04-01 | video | 17945 |
| 2024-04-01 | visit_page | 1012 |
| 2024-05-01 | Evaluation | 3 |
| 2024-05-01 | GESOA | 20 |
| 2024-05-01 | Kanäle | 31 |
| 2024-05-01 | Medien | 39 |
| 2024-05-01 | Search Results Count | 187 |
| 2024-05-01 | click_channelcard | 16 |
| 2024-05-01 | click_slideChange | 3029 |
| 2024-05-01 | click_topic | 138 |
| 2024-05-01 | in_media_search | 114 |
| 2024-05-01 | login | 195 |
| 2024-05-01 | logout | 41 |
| 2024-05-01 | video | 39557 |
| 2024-05-01 | visit_page | 1987 |
| 2024-06-01 | Data protection | 6 |
| 2024-06-01 | Evaluation | 7 |
| 2024-06-01 | GESOA | 39 |
| 2024-06-01 | Kanäle | 62 |
| 2024-06-01 | Media item | 818 |
| 2024-06-01 | Medien | 84 |
| 2024-06-01 | Search Results Count | 174 |
| 2024-06-01 | click_channelcard | 47 |
| 2024-06-01 | click_slideChange | 4007 |
| 2024-06-01 | click_topic | 200 |
| 2024-06-01 | in_media_search | 109 |
| 2024-06-01 | login | 272 |
| 2024-06-01 | logout | 60 |
| 2024-06-01 | video | 31850 |
| 2024-06-01 | visit_page | 2390 |
| 2024-07-01 | Data protection | 1 |
| 2024-07-01 | Evaluation | 14 |
| 2024-07-01 | GESOA | 114 |
| 2024-07-01 | Kanäle | 52 |
| 2024-07-01 | Medien | 51 |
| 2024-07-01 | Search Results Count | 216 |
| 2024-07-01 | click_channelcard | 25 |
| 2024-07-01 | click_slideChange | 4700 |
| 2024-07-01 | click_topic | 177 |
| 2024-07-01 | in_media_search | 255 |
| 2024-07-01 | login | 216 |
| 2024-07-01 | logout | 48 |
| 2024-07-01 | video | 48145 |
| 2024-07-01 | visit_page | 2320 |
| 2024-08-01 | Medien | 1 |
| 2024-08-01 | click_slideChange | 2 |
| 2024-08-01 | login | 2 |
| 2024-08-01 | video | 184 |
| 2024-08-01 | visit_page | 29 |
| 2024-09-01 | Data protection | 1 |
| 2024-09-01 | click_slideChange | 3 |
| 2024-09-01 | in_media_search | 8 |
| 2024-09-01 | login | 4 |
| 2024-09-01 | video | 454 |
| 2024-09-01 | visit_page | 28 |
| 2024-10-01 | GESOA | 6 |
| 2024-10-01 | click_slideChange | 107 |
| 2024-10-01 | login | 10 |
| 2024-10-01 | logout | 2 |
| 2024-10-01 | video | 380 |
| 2024-10-01 | visit_page | 36 |
| 2024-11-01 | Data protection | 1 |
| 2024-11-01 | GESOA | 28 |
| 2024-11-01 | Kanäle | 10 |
| 2024-11-01 | Medien | 10 |
| 2024-11-01 | Search Results Count | 3 |
| 2024-11-01 | click_channelcard | 3 |
| 2024-11-01 | click_slideChange | 2380 |
| 2024-11-01 | click_topic | 40 |
| 2024-11-01 | in_media_search | 10 |
| 2024-11-01 | login | 127 |
| 2024-11-01 | logout | 8 |
| 2024-11-01 | video | 9909 |
| 2024-11-01 | visit_page | 658 |
| 2024-12-01 | Data protection | 10 |
| 2024-12-01 | Evaluation | 7 |
| 2024-12-01 | GESOA | 86 |
| 2024-12-01 | Kanäle | 65 |
| 2024-12-01 | Media item | 3099 |
| 2024-12-01 | Medien | 92 |
| 2024-12-01 | Search Results Count | 60 |
| 2024-12-01 | click_channelcard | 43 |
| 2024-12-01 | click_topic | 192 |
| 2024-12-01 | in_media_search | 80 |
| 2024-12-01 | login | 495 |
| 2024-12-01 | logout | 28 |
| 2024-12-01 | video | 61655 |
| 2024-12-01 | visit_page | 3057 |
| 2025-01-01 | Data protection | 2 |
| 2025-01-01 | Evaluation | 3 |
| 2025-01-01 | GESOA | 56 |
| 2025-01-01 | Kanäle | 40 |
| 2025-01-01 | Media item | 907 |
| 2025-01-01 | Medien | 146 |
| 2025-01-01 | Search Results Count | 51 |
| 2025-01-01 | click_channelcard | 23 |
| 2025-01-01 | click_topic | 255 |
| 2025-01-01 | in_media_search | 69 |
| 2025-01-01 | login | 568 |
| 2025-01-01 | logout | 42 |
| 2025-01-01 | video | 87736 |
| 2025-01-01 | visit_page | 4055 |
| 2025-02-01 | Evaluation | 2 |
| 2025-02-01 | GESOA | 3 |
| 2025-02-01 | Kanäle | 17 |
| 2025-02-01 | Media item | 610 |
| 2025-02-01 | Medien | 13 |
| 2025-02-01 | Search Results Count | 12 |
| 2025-02-01 | click_channelcard | 11 |
| 2025-02-01 | click_topic | 8 |
| 2025-02-01 | in_media_search | 20 |
| 2025-02-01 | login | 106 |
| 2025-02-01 | logout | 2 |
| 2025-02-01 | video | 9526 |
| 2025-02-01 | visit_page | 655 |
| 2025-03-01 | Media item | 354 |
| 2025-03-01 | Medien | 3 |
| 2025-03-01 | login | 2 |
| 2025-03-01 | logout | 4 |
| 2025-03-01 | video | 6 |
| 2025-03-01 | visit_page | 24 |
tar_load(time_visit_wday)time_visit_wday |>
head(30) |>
kable()| idvisit | dow | hour | date_time |
|---|---|---|---|
| 1 | 4 | 14 | 2023-03-23 14:20:12 |
| 2 | 4 | 14 | 2023-03-23 14:29:42 |
| 3 | 1 | 10 | 2024-03-04 10:25:00 |
| 4 | 4 | 15 | 2023-03-23 15:15:59 |
| 5 | 1 | 16 | 2022-12-05 16:34:34 |
| 6 | 4 | 12 | 2024-06-06 12:05:49 |
| 7 | 1 | 17 | 2022-12-05 17:06:08 |
| 8 | 1 | 13 | 2024-03-04 13:33:11 |
| 9 | 4 | 13 | 2024-06-06 13:30:26 |
| 10 | 4 | 17 | 2023-03-23 17:17:36 |
| 11 | 4 | 17 | 2023-03-23 17:18:10 |
| 12 | 1 | 16 | 2024-03-04 16:47:24 |
| 13 | 4 | 18 | 2023-03-23 18:37:56 |
| 14 | 4 | 15 | 2024-06-06 15:47:37 |
| 15 | 4 | 15 | 2024-06-06 15:57:14 |
| 16 | 4 | 16 | 2024-06-06 16:30:32 |
| 17 | 4 | 17 | 2024-06-06 17:31:43 |
| 18 | 5 | 9 | 2023-03-24 09:23:24 |
| 19 | 4 | 20 | 2024-06-06 20:12:04 |
| 20 | 5 | 9 | 2023-03-24 09:33:57 |
| 21 | 5 | 7 | 2024-06-07 07:56:34 |
| 22 | 2 | 8 | 2024-03-05 08:31:43 |
| 23 | 5 | 9 | 2023-03-24 09:57:44 |
| 24 | 5 | 9 | 2023-03-24 09:57:56 |
| 25 | 5 | 9 | 2023-03-24 09:59:37 |
| 26 | 5 | 10 | 2023-03-24 10:32:49 |
| 27 | 5 | 11 | 2024-06-07 11:09:46 |
| 28 | 5 | 10 | 2023-03-24 10:39:07 |
| 29 | 5 | 11 | 2023-03-24 11:11:53 |
| 30 | 3 | 8 | 2022-12-07 08:35:31 |
count_action_type_per_month_top3 <-
count_action_type |>
select(nr, idvisit, category) |>
ungroup() |>
filter(category %in% c("video", "click_slideChange", "visit_page")) |>
left_join(time_visit_wday |> ungroup()) |>
select(-c(dow, hour, nr)) |>
drop_na() |>
mutate(month_start = floor_date(date_time, "month")) |>
count(month_start, category)count_action_type_per_month_top3 |>
gt()| month_start | category | n |
|---|---|---|
| 2022-12-01 | click_slideChange | 337 |
| 2022-12-01 | video | 2522 |
| 2022-12-01 | visit_page | 423 |
| 2023-01-01 | click_slideChange | 438 |
| 2023-01-01 | video | 3672 |
| 2023-01-01 | visit_page | 179 |
| 2023-02-01 | click_slideChange | 1823 |
| 2023-02-01 | video | 10606 |
| 2023-02-01 | visit_page | 491 |
| 2023-03-01 | click_slideChange | 1781 |
| 2023-03-01 | video | 9870 |
| 2023-03-01 | visit_page | 2394 |
| 2023-04-01 | click_slideChange | 5428 |
| 2023-04-01 | video | 33235 |
| 2023-04-01 | visit_page | 3626 |
| 2023-05-01 | click_slideChange | 8371 |
| 2023-05-01 | video | 64184 |
| 2023-05-01 | visit_page | 4564 |
| 2023-06-01 | click_slideChange | 10802 |
| 2023-06-01 | video | 70662 |
| 2023-06-01 | visit_page | 3486 |
| 2023-07-01 | click_slideChange | 6355 |
| 2023-07-01 | video | 68992 |
| 2023-07-01 | visit_page | 3784 |
| 2023-08-01 | click_slideChange | 301 |
| 2023-08-01 | video | 2450 |
| 2023-08-01 | visit_page | 199 |
| 2023-09-01 | click_slideChange | 109 |
| 2023-09-01 | video | 3292 |
| 2023-09-01 | visit_page | 267 |
| 2023-10-01 | click_slideChange | 159 |
| 2023-10-01 | video | 1233 |
| 2023-10-01 | visit_page | 234 |
| 2023-11-01 | click_slideChange | 98 |
| 2023-11-01 | video | 1172 |
| 2023-11-01 | visit_page | 76 |
| 2023-12-01 | click_slideChange | 884 |
| 2023-12-01 | video | 8081 |
| 2023-12-01 | visit_page | 373 |
| 2024-01-01 | click_slideChange | 7781 |
| 2024-01-01 | video | 51149 |
| 2024-01-01 | visit_page | 2830 |
| 2024-02-01 | click_slideChange | 786 |
| 2024-02-01 | video | 12419 |
| 2024-02-01 | visit_page | 448 |
| 2024-03-01 | click_slideChange | 526 |
| 2024-03-01 | video | 6388 |
| 2024-03-01 | visit_page | 924 |
| 2024-04-01 | click_slideChange | 1727 |
| 2024-04-01 | video | 17945 |
| 2024-04-01 | visit_page | 1012 |
| 2024-05-01 | click_slideChange | 3029 |
| 2024-05-01 | video | 39557 |
| 2024-05-01 | visit_page | 1987 |
| 2024-06-01 | click_slideChange | 4007 |
| 2024-06-01 | video | 31850 |
| 2024-06-01 | visit_page | 2390 |
| 2024-07-01 | click_slideChange | 4700 |
| 2024-07-01 | video | 48145 |
| 2024-07-01 | visit_page | 2320 |
| 2024-08-01 | click_slideChange | 2 |
| 2024-08-01 | video | 184 |
| 2024-08-01 | visit_page | 29 |
| 2024-09-01 | click_slideChange | 3 |
| 2024-09-01 | video | 454 |
| 2024-09-01 | visit_page | 28 |
| 2024-10-01 | click_slideChange | 107 |
| 2024-10-01 | video | 380 |
| 2024-10-01 | visit_page | 36 |
| 2024-11-01 | click_slideChange | 2380 |
| 2024-11-01 | video | 9909 |
| 2024-11-01 | visit_page | 658 |
| 2024-12-01 | video | 61655 |
| 2024-12-01 | visit_page | 3057 |
| 2025-01-01 | video | 87736 |
| 2025-01-01 | visit_page | 4055 |
| 2025-02-01 | video | 9526 |
| 2025-02-01 | visit_page | 655 |
| 2025-03-01 | video | 6 |
| 2025-03-01 | visit_page | 24 |
count_action_type_per_month_top3 |>
ggplot(aes(x = month_start, y = n, color = category, group = category)) +
geom_line()eventcategoryFür folgende Analyse wurde eine andere Variable als oben herangezogen, nämlich eventcategory. Dadurch resultieren etwas andere Ergebnisse.
data_slim_filtered_count <-
data_slim_filtered |>
filter(type == "eventcategory") |>
count(value, sort = TRUE)
data_slim_filtered_count |>
gt()| value | n |
|---|---|
| videoplayer_click | 646885 |
| clear_transcript_text_for_llm_context | 104108 |
| click_slideChange | 61934 |
| click_button | 20409 |
| click_transcript_word | 7882 |
| click_videocard | 7164 |
| verify_option_wrong | 4255 |
| login | 4147 |
| click_topic_position_using_image | 2229 |
| click_in_media_search_results | 1268 |
| in_media_search | 1036 |
| generate_questionaire | 1014 |
| generate_questionaire_interval_minutes | 1014 |
| click_start_resize | 975 |
| click_stop_resize | 942 |
| in_media_search_results | 890 |
| verify_option_correct | 708 |
| click_channelcard | 587 |
| click_videocard_search_lecturer | 537 |
| logout | 519 |
| select_transcript_text_for_llm_context | 468 |
| click_videocard_search_course_acronym | 402 |
| click_option | 338 |
| message_to_llm | 279 |
| message_to_llm_de | 258 |
| eval | 240 |
| llm_response_de | 230 |
| click_topic_position_using_link | 213 |
| press_enter | 211 |
| click_topic_details | 181 |
| click_toggle | 171 |
| click_videocard_search_semester | 104 |
| click_channelcard_search_course_acronym | 80 |
| userRole | 75 |
| click_videocard_search_course | 57 |
| click_channelcard_search_faculty_acronym | 33 |
| click_channelcard_search_lecturer | 33 |
| message_to_llm_en | 27 |
| llm_response_en | 26 |
| click_channelcard_search_course | 20 |
| click_channelcard_search_semester | 10 |
| click_channelcard_search_faculty | 7 |
| click_link | 5 |
| click_survey | 3 |
| click_chat_message_thumbs_down | 1 |
| click_chat_message_thumbs_up | 1 |
data_slim_filtered_count |>
ggtexttable()Als Excel-Datei abspeichern:
data_slim_filtered_count |>
writexl::write_xlsx(path = "obj/data_slim_filtered_count.xlsx")count_action_type |>
count(category, sort = TRUE) |>
ggplot(aes(y = reorder(category, n), x = n)) +
geom_col() +
geom_bar_text() +
labs(
x = "User-Aktion",
y = "Aktion",
title = "Anzahl der User-Aktionen nach Kategorie"
) +
theme_minimal() +
scale_x_continuous(labels = scales::comma)count_action_type |>
count(category, sort = TRUE) |>
ggplot(aes(y = reorder(category, n), x = n)) +
geom_col() +
geom_bar_text() +
labs(
x = "Anazhl der User-Aktionen",
y = "Aktion",
title = "Anzahl der User-Aktionen nach Kategorie",
caption = "Log10-Skala"
) +
theme_minimal() +
scale_x_log10()# Define a vector with the names of the days of the week
# Note: Adjust the start of the week (Sunday or Monday) as per your requirement
days_of_week <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
# Replace numbers with day names
time_visit_wday$dow2 <- factor(days_of_week[time_visit_wday$dow],
levels = days_of_week)time_visit_wday |>
as_tibble() |>
count(hour) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = hour, y = prop)) +
geom_col() +
theme_minimal() +
labs(
title = "HaNS-Nutzer sind keine Frühaufsteher",
x = "Uhrzeit",
y = "Anteil"
) # coord_polar()time_visit_wday |>
as_tibble() |>
count(hour) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = hour, y = prop)) +
geom_col() +
theme_minimal() +
coord_polar()time_visit_wday |>
as_tibble() |>
count(dow2) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = dow2, y = prop)) +
geom_col() +
theme_minimal() +
labs(title = "Verteilung der HaNS-Logins nach Wochentagen",
x = "Wochentag",
y = "Anteil") # coord_polar()time_visit_wday |>
as_tibble() |>
count(dow2) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = dow2, y = prop)) +
geom_col() +
theme_minimal() +
labs(title = "Verteilung der HaNS-Logins nach Wochentagen",
x = "Wochentag",
y = "Anteil") +
coord_polar()time_visit_wday |>
as_tibble() |>
count(dow2, hour) |>
group_by(dow2) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = hour, y = prop)) +
geom_col() +
facet_wrap(~ dow2) +
theme_minimal() +
labs(title = "Verteilung der HaNS-Logins nach Wochentagen und Uhrzeiten",
x = "Wochentag",
y = "Anteil") # coord_polar()time_visit_wday |>
as_tibble() |>
count(dow2, hour) |>
group_by(dow2) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = hour, y = prop)) +
geom_col() +
facet_wrap(~ dow2) +
theme_minimal() +
labs(title = "Verteilung der HaNS-Logins nach Wochentagen und Uhrzeiten",
x = "Wochentag",
y = "Anteil") +
coord_polar()time2 <-
time_visit_wday |>
ungroup() |>
mutate(date = as.Date(date_time))
time2 |>
ggplot(aes(x = date, y = hour)) +
geom_bin2d(binwidth = c(1, 1)) + # (1 day, 1 hour)
scale_x_date(date_breaks = "1 month") +
theme(legend.position = "bottom") +
scale_fill_viridis_c() +
labs(caption = "Each x-bin maps to one week")time2 |>
ggplot(aes(x = date, y = hour)) +
geom_bin2d(binwidth = c(7, 1)) + # 1 week, 1 hour
scale_x_date(date_breaks = "1 week", date_labels = "%W") +
theme(legend.position = "bottom") +
scale_fill_viridis_c() +
labs(x = "Week number in 2023/2024",
caption = "Each x-bin maps to one week")time2 |>
ggplot(aes(x = date, y = dow)) +
geom_bin2d(binwidth = c(7, 1)) + # 1 week, 1 hour
scale_x_date(date_breaks = "1 week", date_labels = "%W") +
theme(legend.position = "bottom") +
scale_fill_viridis_c() +
labs(x = "Week number in 2023/2024",
caption = "Each x-bin maps to one week",
y = "Day of Week") +
scale_y_continuous(breaks = 1:7)Berechnungsgrundlage: Für diese Analyse wurden alle Events der Kategorie llm gefiltert.
data_slim_filtered_ai <-
data_slim_filtered |>
filter(type == "eventcategory") |>
filter(str_detect(value, "llm")) |>
count(value, sort = TRUE) |>
mutate(prop = n / sum(n))
data_slim_filtered_ai |>
gt() |>
fmt_auto()| value | n | prop |
|---|---|---|
| clear_transcript_text_for_llm_context | 104,108 | 0.988 |
| select_transcript_text_for_llm_context | 468 | 0.004 |
| message_to_llm | 279 | 0.003 |
| message_to_llm_de | 258 | 0.002 |
| llm_response_de | 230 | 0.002 |
| message_to_llm_en | 27 | 2.562 × 10−4 |
| llm_response_en | 26 | 2.467 × 10−4 |
data_slim_filtered_ai |>
mutate(prop = round(prop, 3)) |>
ggtexttable()data_slim_filtered_llm_interact <-
data_slim_filtered |>
mutate(has_llm = str_detect(value, "llm")) |>
group_by(idvisit) |>
summarise(llm_used_during_visit = any(has_llm == TRUE)) |>
count(llm_used_during_visit) |>
mutate(prop = round(n /sum(n), 2))
data_slim_filtered_llm_interact|>
gt()| llm_used_during_visit | n | prop |
|---|---|---|
| FALSE | 4286 | 0.89 |
| TRUE | 509 | 0.11 |
data_slim_filtered_llm_interact |>
ggtexttable()tar_load(idvisit_has_llm)
idvisit_has_llm |>
head(30) |>
kable()| nr | type | value | idvisit | uses_llm | date_time | dow | hour | year | month | year_month |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | timestamp | 2022-12-05 15:33:45 | 1 | FALSE | 2022-12-05 15:33:45 | 2 | 15 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-05 15:35:45 | 2 | FALSE | 2022-12-05 15:35:45 | 2 | 15 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2024-03-04 10:25:00 | 3 | TRUE | 2024-03-04 10:25:00 | 2 | 10 | 2024 | 3 | 2024-3 |
| 0 | timestamp | 2022-12-05 16:20:05 | 4 | FALSE | 2022-12-05 16:20:05 | 2 | 16 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-05 16:34:34 | 5 | FALSE | 2022-12-05 16:34:34 | 2 | 16 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-05 16:42:53 | 6 | FALSE | 2022-12-05 16:42:53 | 2 | 16 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-05 17:06:08 | 7 | FALSE | 2022-12-05 17:06:08 | 2 | 17 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2024-03-04 13:33:11 | 8 | FALSE | 2024-03-04 13:33:11 | 2 | 13 | 2024 | 3 | 2024-3 |
| 0 | timestamp | 2022-12-05 22:30:01 | 9 | FALSE | 2022-12-05 22:30:01 | 2 | 22 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2023-03-23 17:17:36 | 10 | FALSE | 2023-03-23 17:17:36 | 5 | 17 | 2023 | 3 | 2023-3 |
| 0 | timestamp | 2022-12-06 08:09:01 | 11 | FALSE | 2022-12-06 08:09:01 | 3 | 8 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 09:26:13 | 12 | FALSE | 2022-12-06 09:26:13 | 3 | 9 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 10:10:10 | 13 | FALSE | 2022-12-06 10:10:10 | 3 | 10 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 12:04:24 | 14 | FALSE | 2022-12-06 12:04:24 | 3 | 12 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 13:10:12 | 15 | FALSE | 2022-12-06 13:10:12 | 3 | 13 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2024-06-06 16:30:32 | 16 | FALSE | 2024-06-06 16:30:32 | 5 | 16 | 2024 | 6 | 2024-6 |
| 0 | timestamp | 2022-12-06 13:59:27 | 17 | TRUE | 2022-12-06 13:59:27 | 3 | 13 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 14:35:57 | 18 | TRUE | 2022-12-06 14:35:57 | 3 | 14 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 14:39:14 | 19 | FALSE | 2022-12-06 14:39:14 | 3 | 14 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 16:17:59 | 20 | FALSE | 2022-12-06 16:17:59 | 3 | 16 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 16:31:52 | 21 | TRUE | 2022-12-06 16:31:52 | 3 | 16 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 17:06:34 | 22 | FALSE | 2022-12-06 17:06:34 | 3 | 17 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 17:26:27 | 23 | FALSE | 2022-12-06 17:26:27 | 3 | 17 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 17:33:51 | 24 | FALSE | 2022-12-06 17:33:51 | 3 | 17 | 2022 | 12 | 2022-12 |
| 1 | timestamp | 2022-12-06 17:33:51 | 24 | FALSE | 2022-12-06 17:33:51 | 3 | 17 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2023-03-24 09:59:37 | 25 | FALSE | 2023-03-24 09:59:37 | 6 | 9 | 2023 | 3 | 2023-3 |
| 0 | timestamp | 2023-03-24 10:32:49 | 26 | FALSE | 2023-03-24 10:32:49 | 6 | 10 | 2023 | 3 | 2023-3 |
| 0 | timestamp | 2022-12-06 20:38:32 | 27 | FALSE | 2022-12-06 20:38:32 | 3 | 20 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 20:53:53 | 28 | FALSE | 2022-12-06 20:53:53 | 3 | 20 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 22:39:05 | 29 | TRUE | 2022-12-06 22:39:05 | 3 | 22 | 2022 | 12 | 2022-12 |
idvisit_has_llm_timeline <-
idvisit_has_llm |>
count(year_month, uses_llm) |>
ungroup() |>
group_by(year_month) |>
mutate(prop = round(n/sum(n), 2))
idvisit_has_llm_timeline|>
gt()| uses_llm | n | prop |
|---|---|---|
| 2022-12 | ||
| FALSE | 275 | 0.78 |
| TRUE | 79 | 0.22 |
| 2023-1 | ||
| FALSE | 406 | 0.76 |
| TRUE | 126 | 0.24 |
| 2023-10 | ||
| FALSE | 96 | 0.78 |
| TRUE | 27 | 0.22 |
| 2023-11 | ||
| FALSE | 40 | 0.82 |
| TRUE | 9 | 0.18 |
| 2023-12 | ||
| FALSE | 314 | 0.99 |
| TRUE | 3 | 0.01 |
| 2023-2 | ||
| FALSE | 554 | 0.84 |
| TRUE | 108 | 0.16 |
| 2023-3 | ||
| FALSE | 74 | 0.94 |
| TRUE | 5 | 0.06 |
| 2023-4 | ||
| FALSE | 91 | 0.69 |
| TRUE | 40 | 0.31 |
| 2023-5 | ||
| FALSE | 119 | 0.74 |
| TRUE | 41 | 0.26 |
| 2023-6 | ||
| FALSE | 67 | 0.83 |
| TRUE | 14 | 0.17 |
| 2023-7 | ||
| FALSE | 219 | 0.93 |
| TRUE | 16 | 0.07 |
| 2023-8 | ||
| FALSE | 31 | 1.00 |
| 2023-9 | ||
| FALSE | 40 | 1.00 |
| 2024-1 | ||
| FALSE | 887 | 0.95 |
| TRUE | 44 | 0.05 |
| 2024-10 | ||
| FALSE | 9 | 1.00 |
| 2024-11 | ||
| FALSE | 166 | 0.92 |
| TRUE | 15 | 0.08 |
| 2024-12 | ||
| FALSE | 794 | 0.92 |
| TRUE | 67 | 0.08 |
| 2024-2 | ||
| FALSE | 94 | 1.00 |
| 2024-3 | ||
| FALSE | 6 | 0.67 |
| TRUE | 3 | 0.33 |
| 2024-4 | ||
| FALSE | 6 | 0.33 |
| TRUE | 12 | 0.67 |
| 2024-5 | ||
| FALSE | 113 | 1.00 |
| 2024-6 | ||
| FALSE | 70 | 0.91 |
| TRUE | 7 | 0.09 |
| 2024-7 | ||
| FALSE | 23 | 1.00 |
| 2024-8 | ||
| FALSE | 1 | 1.00 |
| 2025-1 | ||
| FALSE | 1008 | 0.94 |
| TRUE | 60 | 0.06 |
| 2025-2 | ||
| FALSE | 166 | 0.96 |
| TRUE | 7 | 0.04 |
| 2025-3 | ||
| FALSE | 2 | 1.00 |
idvisit_has_llm_timeline |>
ggtexttable()idvisit_has_llm |>
count(year_month, uses_llm) |>
ungroup() |>
group_by(year_month) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = year_month, y = prop, color = uses_llm, groups = uses_llm)) +
geom_point() +
geom_line(aes(group = uses_llm)) +
labs(title = "Visitors, die mit dem LLM interagieren im Zeitverlauf (Anteile)")idvisit_has_llm |>
count(year_month, uses_llm) |>
ungroup() |>
group_by(year_month) |>
ggplot(aes(x = year_month, y = n, color = uses_llm, groups = uses_llm)) +
geom_point() +
geom_line(aes(group = uses_llm)) +
labs(title = "Visitors, die mit dem LLM interagieren im Zeitverlauf (Anzahl)")data_slim_filtered |>
filter(type == "subtitle") |>
filter(!is.na(value) & value != "") |>
count(click_transcript_word = str_detect(value, "click_transcript_word")) |>
mutate(prop = round(n/sum(n), 2)) |>
gt()| click_transcript_word | n | prop |
|---|---|---|
| FALSE | 910840 | 0.99 |
| TRUE | 7882 | 0.01 |
tar_load(data_long)data_long |>
head(30) idvisit variable value
<char> <char> <char>
1: 13 actiondetails_0_type action
2: 11 actiondetails_0_type action
3: 10 actiondetails_0_type action
4: 4 actiondetails_0_type event
5: 1 actiondetails_0_type action
6: 2 actiondetails_0_type action
7: 38 actiondetails_0_type event
8: 36 actiondetails_0_type event
9: 37 actiondetails_0_type event
10: 34 actiondetails_0_type action
11: 33 actiondetails_0_type event
12: 32 actiondetails_0_type event
13: 31 actiondetails_0_type action
14: 26 actiondetails_0_type action
15: 29 actiondetails_0_type action
16: 28 actiondetails_0_type action
17: 18 actiondetails_0_type event
18: 20 actiondetails_0_type action
19: 25 actiondetails_0_type action
20: 24 actiondetails_0_type action
21: 23 actiondetails_0_type event
22: 44 actiondetails_0_type action
23: 45 actiondetails_0_type action
24: 43 actiondetails_0_type action
25: 42 actiondetails_0_type event
26: 59 actiondetails_0_type event
27: 57 actiondetails_0_type action
28: 52 actiondetails_0_type action
29: 51 actiondetails_0_type event
30: 49 actiondetails_0_type action
idvisit variable value
regex_pattern <- "Category: \"(.*?)(?=', Action)"
# Explaining this regex_pattern:
# Find the literal string
# 1. `Category: ` (surrounded by quotation marks)
# 2. Capture any characters (.*?) that follow, non-greedily, until...
# 3. ...it encounters the literal sequence, ` Action`) immediately after the captured string.
ai_actions_count <-
data_long |>
# slice(1:1000) |>
filter(str_detect(value, "transcript")) |>
mutate(category = str_extract(value, regex_pattern)) |>
select(category) |>
mutate(category = str_replace_all(category, "[\"']", "")) |>
count(category, sort = TRUE)
ai_actions_count |>
gt()| category | n |
|---|---|
| NA | 217059 |
| Category: clear_transcript_text_for_llm_context | 104108 |
| Category: click_transcript_word | 7882 |
| Category: select_transcript_text_for_llm_context | 468 |
| Category: click_button | 20 |
| Category: llm_response_de | 3 |
| Category: llm_response_en | 2 |
Im Objekt wird gezählt, wie oft der String "click_transcript_word" in den Daten (Langformat) gefunden wird, s. Target ai_transcript_clicks_per_month in der Targets-Pipeline.
tar_load(ai_transcript_clicks_per_month)
ai_transcript_clicks_per_month |>
head(30) |>
kable()| nr | type | value | idvisit | clicks_transcript_any | date_time | dow | hour | year | month | year_month |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | timestamp | 2022-12-05 15:33:45 | 1 | FALSE | 2022-12-05 15:33:45 | 2 | 15 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-05 15:35:45 | 2 | FALSE | 2022-12-05 15:35:45 | 2 | 15 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2024-03-04 10:25:00 | 3 | FALSE | 2024-03-04 10:25:00 | 2 | 10 | 2024 | 3 | 2024-3 |
| 0 | timestamp | 2022-12-05 16:20:05 | 4 | FALSE | 2022-12-05 16:20:05 | 2 | 16 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-05 16:34:34 | 5 | FALSE | 2022-12-05 16:34:34 | 2 | 16 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-05 16:42:53 | 6 | TRUE | 2022-12-05 16:42:53 | 2 | 16 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-05 17:06:08 | 7 | FALSE | 2022-12-05 17:06:08 | 2 | 17 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2024-03-04 13:33:11 | 8 | FALSE | 2024-03-04 13:33:11 | 2 | 13 | 2024 | 3 | 2024-3 |
| 0 | timestamp | 2022-12-05 22:30:01 | 9 | FALSE | 2022-12-05 22:30:01 | 2 | 22 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2023-03-23 17:17:36 | 10 | FALSE | 2023-03-23 17:17:36 | 5 | 17 | 2023 | 3 | 2023-3 |
| 0 | timestamp | 2022-12-06 08:09:01 | 11 | FALSE | 2022-12-06 08:09:01 | 3 | 8 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 09:26:13 | 12 | FALSE | 2022-12-06 09:26:13 | 3 | 9 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 10:10:10 | 13 | TRUE | 2022-12-06 10:10:10 | 3 | 10 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 12:04:24 | 14 | FALSE | 2022-12-06 12:04:24 | 3 | 12 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 13:10:12 | 15 | FALSE | 2022-12-06 13:10:12 | 3 | 13 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2024-06-06 16:30:32 | 16 | FALSE | 2024-06-06 16:30:32 | 5 | 16 | 2024 | 6 | 2024-6 |
| 0 | timestamp | 2022-12-06 13:59:27 | 17 | FALSE | 2022-12-06 13:59:27 | 3 | 13 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 14:35:57 | 18 | FALSE | 2022-12-06 14:35:57 | 3 | 14 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 14:39:14 | 19 | TRUE | 2022-12-06 14:39:14 | 3 | 14 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 16:17:59 | 20 | FALSE | 2022-12-06 16:17:59 | 3 | 16 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 16:31:52 | 21 | TRUE | 2022-12-06 16:31:52 | 3 | 16 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 17:06:34 | 22 | FALSE | 2022-12-06 17:06:34 | 3 | 17 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 17:26:27 | 23 | TRUE | 2022-12-06 17:26:27 | 3 | 17 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 17:33:51 | 24 | FALSE | 2022-12-06 17:33:51 | 3 | 17 | 2022 | 12 | 2022-12 |
| 1 | timestamp | 2022-12-06 17:33:51 | 24 | FALSE | 2022-12-06 17:33:51 | 3 | 17 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2023-03-24 09:59:37 | 25 | FALSE | 2023-03-24 09:59:37 | 6 | 9 | 2023 | 3 | 2023-3 |
| 0 | timestamp | 2023-03-24 10:32:49 | 26 | FALSE | 2023-03-24 10:32:49 | 6 | 10 | 2023 | 3 | 2023-3 |
| 0 | timestamp | 2022-12-06 20:38:32 | 27 | FALSE | 2022-12-06 20:38:32 | 3 | 20 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 20:53:53 | 28 | TRUE | 2022-12-06 20:53:53 | 3 | 20 | 2022 | 12 | 2022-12 |
| 0 | timestamp | 2022-12-06 22:39:05 | 29 | FALSE | 2022-12-06 22:39:05 | 3 | 22 | 2022 | 12 | 2022-12 |
ai_transcript_clicks_per_month_count <-
ai_transcript_clicks_per_month |>
count(year_month, clicks_transcript_any) |>
ungroup() |>
group_by(year_month) |>
mutate(prop = round(n/sum(n), 2))
ai_transcript_clicks_per_month_count|>
gt()| clicks_transcript_any | n | prop |
|---|---|---|
| 2022-12 | ||
| FALSE | 245 | 0.69 |
| TRUE | 109 | 0.31 |
| 2023-1 | ||
| FALSE | 303 | 0.57 |
| TRUE | 229 | 0.43 |
| 2023-10 | ||
| FALSE | 98 | 0.80 |
| TRUE | 25 | 0.20 |
| 2023-11 | ||
| FALSE | 41 | 0.84 |
| TRUE | 8 | 0.16 |
| 2023-12 | ||
| FALSE | 270 | 0.85 |
| TRUE | 47 | 0.15 |
| 2023-2 | ||
| FALSE | 479 | 0.72 |
| TRUE | 183 | 0.28 |
| 2023-3 | ||
| FALSE | 58 | 0.73 |
| TRUE | 21 | 0.27 |
| 2023-4 | ||
| FALSE | 93 | 0.71 |
| TRUE | 38 | 0.29 |
| 2023-5 | ||
| FALSE | 105 | 0.66 |
| TRUE | 55 | 0.34 |
| 2023-6 | ||
| FALSE | 61 | 0.75 |
| TRUE | 20 | 0.25 |
| 2023-7 | ||
| FALSE | 150 | 0.64 |
| TRUE | 85 | 0.36 |
| 2023-8 | ||
| FALSE | 27 | 0.87 |
| TRUE | 4 | 0.13 |
| 2023-9 | ||
| FALSE | 33 | 0.82 |
| TRUE | 7 | 0.17 |
| 2024-1 | ||
| FALSE | 765 | 0.82 |
| TRUE | 166 | 0.18 |
| 2024-10 | ||
| FALSE | 9 | 1.00 |
| 2024-11 | ||
| FALSE | 163 | 0.90 |
| TRUE | 18 | 0.10 |
| 2024-12 | ||
| FALSE | 778 | 0.90 |
| TRUE | 83 | 0.10 |
| 2024-2 | ||
| FALSE | 69 | 0.73 |
| TRUE | 25 | 0.27 |
| 2024-3 | ||
| FALSE | 5 | 0.56 |
| TRUE | 4 | 0.44 |
| 2024-4 | ||
| FALSE | 16 | 0.89 |
| TRUE | 2 | 0.11 |
| 2024-5 | ||
| FALSE | 106 | 0.94 |
| TRUE | 7 | 0.06 |
| 2024-6 | ||
| FALSE | 73 | 0.95 |
| TRUE | 4 | 0.05 |
| 2024-7 | ||
| FALSE | 17 | 0.74 |
| TRUE | 6 | 0.26 |
| 2024-8 | ||
| FALSE | 1 | 1.00 |
| 2025-1 | ||
| FALSE | 982 | 0.92 |
| TRUE | 86 | 0.08 |
| 2025-2 | ||
| FALSE | 171 | 0.99 |
| TRUE | 2 | 0.01 |
| 2025-3 | ||
| FALSE | 2 | 1.00 |
ai_transcript_clicks_per_month_count |>
ggtexttable()ai_transcript_clicks_per_month_count |>
mutate(date = ymd(paste0(year_month,"-01"))) |>
ggplot(aes(x = date, y = n)) +
geom_line(group = 1) +
geom_point()Wie viel Zeit verbringen die Nutzer mit dem Betrachten von Videos (“Glotzdauer”)?
Achtung: Die Videozeit ist schwierig auszuwerten. Die Nutzer beenden keine Videos, in dem sie auf “Pause” drücken, sondern indem sie andere Aktionen durchführen. Dies ist aber analytisch schwer abzubilden.
tar_load(glotzdauer)Vgl. die Definition des Targets glotzdauer in der Pipeline.
Kurz gesagt wird die Zeit-Differenz zwischen zwei aufeinander folgenden “Play” und “Pause” Aktionen berechnet.
Allerdings hat dieses Vorgehen Schwierigkeiten: Nicht immer folgt auf einem “Play” ein “Pause”. Es ist schwer auszuwerten, wann die Betrachtung eines Videos endet. Daher ist diese Analyse nur vorsichtig zu interpretieren.
Die Definition der Funktion glotzdauer.R ist online dokumentiert.
glotzdauer |>
head(30) |>
kable()| idvisit | first_play | last_pause | date | time_diff |
|---|---|---|---|---|
| 1 | 2023-03-23 14:22:45 | 2023-10-04 16:40:59 | 2023-03-23 | 16852694 secs |
| 3 | 2024-06-06 11:36:18 | 2024-06-06 11:36:19 | 2024-06-06 | 1 secs |
| 4 | 2023-03-23 15:15:59 | 2023-03-23 15:49:34 | 2023-03-23 | 2015 secs |
| 5 | 2022-12-05 16:35:19 | 2022-12-05 16:35:22 | 2022-12-05 | 3 secs |
| 6 | 2022-12-05 17:26:05 | 2022-12-05 17:26:06 | 2022-12-05 | 1 secs |
| 7 | 2022-12-05 17:06:21 | 2022-12-05 17:06:08 | 2022-12-05 | -13 secs |
| 8 | 2024-03-04 13:33:11 | 2024-03-04 13:33:12 | 2024-03-04 | 1 secs |
| 9 | 2022-12-05 22:33:03 | 2022-12-05 22:32:59 | 2022-12-05 | -4 secs |
| 10 | 2023-03-23 17:18:39 | 2023-03-23 17:17:40 | 2023-03-23 | -59 secs |
| 11 | 2022-12-06 08:14:28 | 2022-12-06 08:14:11 | 2022-12-06 | -17 secs |
| 12 | 2024-06-06 15:42:21 | 2024-06-06 15:42:25 | 2024-06-06 | 4 secs |
| 13 | 2023-03-23 18:37:59 | 2023-03-23 18:38:15 | 2023-03-23 | 16 secs |
| 14 | 2022-12-06 12:22:50 | 2022-12-06 12:21:49 | 2022-12-06 | -61 secs |
| 15 | 2022-12-06 13:12:47 | 2022-12-06 13:12:47 | 2022-12-06 | 0 secs |
| 16 | 2024-06-06 16:30:51 | 2024-06-06 16:32:18 | 2024-06-06 | 87 secs |
| 17 | 2024-06-06 17:32:08 | 2024-10-08 11:23:06 | 2024-06-06 | 10691458 secs |
| 18 | 2023-03-24 09:30:16 | 2023-03-24 09:45:54 | 2023-03-24 | 938 secs |
| 19 | 2022-12-06 14:41:45 | 2022-12-06 14:42:03 | 2022-12-06 | 18 secs |
| 20 | 2022-12-06 16:55:16 | 2022-12-06 16:55:29 | 2022-12-06 | 13 secs |
| 21 | 2024-06-07 07:57:02 | 2024-06-07 07:57:19 | 2024-06-07 | 17 secs |
| 22 | 2022-12-06 17:08:50 | 2022-12-06 17:09:05 | 2022-12-06 | 15 secs |
| 23 | 2024-03-05 09:31:13 | 2023-03-24 09:57:51 | 2023-03-24 | -29979202 secs |
| 24 | 2023-03-24 09:58:17 | 2023-03-24 09:57:56 | 2023-03-24 | -21 secs |
| 25 | 2023-03-24 10:00:03 | 2023-03-24 10:00:16 | 2023-03-24 | 13 secs |
| 26 | 2023-03-24 10:52:58 | NA | 2023-03-24 | NA secs |
| 27 | 2024-06-07 11:13:24 | 2024-06-07 11:13:24 | 2024-06-07 | 0 secs |
| 28 | 2022-12-06 21:11:54 | 2022-12-06 21:12:27 | 2022-12-06 | 33 secs |
| 29 | 2023-03-24 11:12:21 | 2023-03-24 11:12:28 | 2023-03-24 | 7 secs |
| 30 | 2022-12-07 08:35:31 | 2022-12-07 08:35:32 | 2022-12-07 | 1 secs |
| 31 | 2023-03-24 13:26:00 | 2023-03-24 13:34:27 | 2023-03-24 | 507 secs |
Für die folgende Darstellung wurden die absoluten Zeitwerte verwendet, d.h. ohne Vorzeichen.
glotzdauer |>
# we will assume that negative glotzdauer is the as positive glotzdauer:
mutate(time_diff = abs(time_diff)) |>
# without glotzdauer smaller than 10 minutes:
filter(time_diff < 60*10) |>
ggplot(aes(x = time_diff)) +
geom_histogram() +
scale_x_time() +
labs(x = "Time interval [minutes]",
caption = "Only time intervals less than 10 minutes. It is assumed that video time is positive only (no negative time intervals).")glotzdauer_prepped <-
glotzdauer |>
# we will assume that negative glotzdauer is the as positive glotzdauer:
mutate(time_diff_abs_sec = abs(as.numeric(time_diff, units = "secs"))) |>
# without glotzdauer smaller than 10 minutes:
filter(time_diff_abs_sec < 60*10) |>
mutate(time_diff_abs_min = time_diff_abs_sec / 60)
glotzdauer_tbl <-
glotzdauer_prepped |>
select(time_diff_abs_sec, time_diff_abs_min) |>
describe_distribution()
glotzdauer_tbl |>
kable()| Variable | Mean | SD | IQR | Min | Max | Skewness | Kurtosis | n | n_Missing |
|---|---|---|---|---|---|---|---|---|---|
| time_diff_abs_sec | 57.8050366 | 103.166468 | 55.0000000 | 0 | 597.00 | 2.74483 | 7.78949 | 3693 | 0 |
| time_diff_abs_min | 0.9634173 | 1.719441 | 0.9166667 | 0 | 9.95 | 2.74483 | 7.78949 | 3693 | 0 |
glotzdauer_tbl |>
mutate(across(where(is.numeric), ~ round(., 2))) |>
ggpubr::ggtexttable()glotzdauer_prepped_tbl <-
glotzdauer_prepped |>
mutate(first_of_month = floor_date(date, unit = "month")) |>
group_by(first_of_month) |>
summarise(time_diff_mean = mean(time_diff, na.rm = TRUE))
glotzdauer_prepped_tbl |>
kable()| first_of_month | time_diff_mean |
|---|---|
| 2022-12-01 | 22.736364 secs |
| 2023-01-01 | 28.109244 secs |
| 2023-02-01 | 15.815451 secs |
| 2023-03-01 | 47.571429 secs |
| 2023-04-01 | 44.633588 secs |
| 2023-05-01 | 46.980582 secs |
| 2023-06-01 | 52.568750 secs |
| 2023-07-01 | 35.906250 secs |
| 2023-08-01 | 109.500000 secs |
| 2023-09-01 | 23.733333 secs |
| 2023-10-01 | 23.500000 secs |
| 2023-11-01 | 27.833333 secs |
| 2023-12-01 | 8.804196 secs |
| 2024-01-01 | 46.468619 secs |
| 2024-02-01 | 18.714286 secs |
| 2024-03-01 | 16.774193 secs |
| 2024-04-01 | 30.059701 secs |
| 2024-05-01 | 43.555556 secs |
| 2024-06-01 | 53.278846 secs |
| 2024-07-01 | 55.058824 secs |
| 2024-08-01 | -24.000000 secs |
| 2024-09-01 | -9.000000 secs |
| 2024-10-01 | 36.750000 secs |
| 2024-11-01 | 47.181818 secs |
| 2024-12-01 | 60.773707 secs |
| 2025-01-01 | 52.115732 secs |
| 2025-02-01 | 14.147059 secs |
| 2025-03-01 | 5.000000 secs |
glotzdauer_prepped_tbl |>
ggplot(aes(x = first_of_month, y = time_diff_mean)) +
geom_line()